1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1033:
178     case GK_GFX1032:
179     case GK_GFX1031:
180     case GK_GFX1030:
181       Features["ci-insts"] = true;
182       Features["dot1-insts"] = true;
183       Features["dot2-insts"] = true;
184       Features["dot5-insts"] = true;
185       Features["dot6-insts"] = true;
186       Features["dl-insts"] = true;
187       Features["flat-address-space"] = true;
188       Features["16-bit-insts"] = true;
189       Features["dpp"] = true;
190       Features["gfx8-insts"] = true;
191       Features["gfx9-insts"] = true;
192       Features["gfx10-insts"] = true;
193       Features["gfx10-3-insts"] = true;
194       Features["s-memrealtime"] = true;
195       break;
196     case GK_GFX1012:
197     case GK_GFX1011:
198       Features["dot1-insts"] = true;
199       Features["dot2-insts"] = true;
200       Features["dot5-insts"] = true;
201       Features["dot6-insts"] = true;
202       LLVM_FALLTHROUGH;
203     case GK_GFX1010:
204       Features["dl-insts"] = true;
205       Features["ci-insts"] = true;
206       Features["flat-address-space"] = true;
207       Features["16-bit-insts"] = true;
208       Features["dpp"] = true;
209       Features["gfx8-insts"] = true;
210       Features["gfx9-insts"] = true;
211       Features["gfx10-insts"] = true;
212       Features["s-memrealtime"] = true;
213       Features["s-memtime-inst"] = true;
214       break;
215     case GK_GFX90A:
216       Features["gfx90a-insts"] = true;
217       LLVM_FALLTHROUGH;
218     case GK_GFX908:
219       Features["dot3-insts"] = true;
220       Features["dot4-insts"] = true;
221       Features["dot5-insts"] = true;
222       Features["dot6-insts"] = true;
223       Features["mai-insts"] = true;
224       LLVM_FALLTHROUGH;
225     case GK_GFX906:
226       Features["dl-insts"] = true;
227       Features["dot1-insts"] = true;
228       Features["dot2-insts"] = true;
229       LLVM_FALLTHROUGH;
230     case GK_GFX90C:
231     case GK_GFX909:
232     case GK_GFX904:
233     case GK_GFX902:
234     case GK_GFX900:
235       Features["gfx9-insts"] = true;
236       LLVM_FALLTHROUGH;
237     case GK_GFX810:
238     case GK_GFX805:
239     case GK_GFX803:
240     case GK_GFX802:
241     case GK_GFX801:
242       Features["gfx8-insts"] = true;
243       Features["16-bit-insts"] = true;
244       Features["dpp"] = true;
245       Features["s-memrealtime"] = true;
246       LLVM_FALLTHROUGH;
247     case GK_GFX705:
248     case GK_GFX704:
249     case GK_GFX703:
250     case GK_GFX702:
251     case GK_GFX701:
252     case GK_GFX700:
253       Features["ci-insts"] = true;
254       Features["flat-address-space"] = true;
255       LLVM_FALLTHROUGH;
256     case GK_GFX602:
257     case GK_GFX601:
258     case GK_GFX600:
259       Features["s-memtime-inst"] = true;
260       break;
261     case GK_NONE:
262       break;
263     default:
264       llvm_unreachable("Unhandled GPU!");
265     }
266   } else {
267     if (CPU.empty())
268       CPU = "r600";
269 
270     switch (llvm::AMDGPU::parseArchR600(CPU)) {
271     case GK_CAYMAN:
272     case GK_CYPRESS:
273     case GK_RV770:
274     case GK_RV670:
275       // TODO: Add fp64 when implemented.
276       break;
277     case GK_TURKS:
278     case GK_CAICOS:
279     case GK_BARTS:
280     case GK_SUMO:
281     case GK_REDWOOD:
282     case GK_JUNIPER:
283     case GK_CEDAR:
284     case GK_RV730:
285     case GK_RV710:
286     case GK_RS880:
287     case GK_R630:
288     case GK_R600:
289       break;
290     default:
291       llvm_unreachable("Unhandled GPU!");
292     }
293   }
294 
295   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
296 }
297 
298 void AMDGPUTargetInfo::fillValidCPUList(
299     SmallVectorImpl<StringRef> &Values) const {
300   if (isAMDGCN(getTriple()))
301     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
302   else
303     llvm::AMDGPU::fillValidArchListR600(Values);
304 }
305 
306 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
307   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
308 }
309 
310 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
311                                    const TargetOptions &Opts)
312     : TargetInfo(Triple),
313       GPUKind(isAMDGCN(Triple) ?
314               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
315               llvm::AMDGPU::parseArchR600(Opts.CPU)),
316       GPUFeatures(isAMDGCN(Triple) ?
317                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
318                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
319   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
320                                         : DataLayoutStringR600);
321   assert(DataLayout->getAllocaAddrSpace() == Private);
322   GridValues = llvm::omp::AMDGPUGpuGridValues;
323 
324   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
325                      !isAMDGCN(Triple));
326   UseAddrSpaceMapMangling = true;
327 
328   HasLegalHalfType = true;
329   HasFloat16 = true;
330   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
331   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
332 
333   // Set pointer width and alignment for target address space 0.
334   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
335   if (getMaxPointerWidth() == 64) {
336     LongWidth = LongAlign = 64;
337     SizeType = UnsignedLong;
338     PtrDiffType = SignedLong;
339     IntPtrType = SignedLong;
340   }
341 
342   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
343 }
344 
345 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
346   TargetInfo::adjust(Opts);
347   // ToDo: There are still a few places using default address space as private
348   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
349   // can be removed from the following line.
350   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
351                      !isAMDGCN(getTriple()));
352 }
353 
354 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
355   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
356                                              Builtin::FirstTSBuiltin);
357 }
358 
359 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
360                                         MacroBuilder &Builder) const {
361   Builder.defineMacro("__AMD__");
362   Builder.defineMacro("__AMDGPU__");
363 
364   if (isAMDGCN(getTriple()))
365     Builder.defineMacro("__AMDGCN__");
366   else
367     Builder.defineMacro("__R600__");
368 
369   if (GPUKind != llvm::AMDGPU::GK_NONE) {
370     StringRef CanonName = isAMDGCN(getTriple()) ?
371       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
372     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
373     if (isAMDGCN(getTriple())) {
374       Builder.defineMacro("__amdgcn_processor__",
375                           Twine("\"") + Twine(CanonName) + Twine("\""));
376       Builder.defineMacro("__amdgcn_target_id__",
377                           Twine("\"") + Twine(getTargetID().getValue()) +
378                               Twine("\""));
379       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
380         auto Loc = OffloadArchFeatures.find(F);
381         if (Loc != OffloadArchFeatures.end()) {
382           std::string NewF = F.str();
383           std::replace(NewF.begin(), NewF.end(), '-', '_');
384           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
385                                   Twine("__"),
386                               Loc->second ? "1" : "0");
387         }
388       }
389     }
390   }
391 
392   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
393   // removed in the near future.
394   if (hasFMAF())
395     Builder.defineMacro("__HAS_FMAF__");
396   if (hasFastFMAF())
397     Builder.defineMacro("FP_FAST_FMAF");
398   if (hasLDEXPF())
399     Builder.defineMacro("__HAS_LDEXPF__");
400   if (hasFP64())
401     Builder.defineMacro("__HAS_FP64__");
402   if (hasFastFMA())
403     Builder.defineMacro("FP_FAST_FMA");
404 
405   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
406 }
407 
408 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
409   assert(HalfFormat == Aux->HalfFormat);
410   assert(FloatFormat == Aux->FloatFormat);
411   assert(DoubleFormat == Aux->DoubleFormat);
412 
413   // On x86_64 long double is 80-bit extended precision format, which is
414   // not supported by AMDGPU. 128-bit floating point format is also not
415   // supported by AMDGPU. Therefore keep its own format for these two types.
416   auto SaveLongDoubleFormat = LongDoubleFormat;
417   auto SaveFloat128Format = Float128Format;
418   copyAuxTarget(Aux);
419   LongDoubleFormat = SaveLongDoubleFormat;
420   Float128Format = SaveFloat128Format;
421   // For certain builtin types support on the host target, claim they are
422   // support to pass the compilation of the host code during the device-side
423   // compilation.
424   // FIXME: As the side effect, we also accept `__float128` uses in the device
425   // code. To rejct these builtin types supported in the host target but not in
426   // the device target, one approach would support `device_builtin` attribute
427   // so that we could tell the device builtin types from the host ones. The
428   // also solves the different representations of the same builtin type, such
429   // as `size_t` in the MSVC environment.
430   if (Aux->hasFloat128Type()) {
431     HasFloat128 = true;
432     Float128Format = DoubleFormat;
433   }
434 }
435