1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1033:
178     case GK_GFX1032:
179     case GK_GFX1031:
180     case GK_GFX1030:
181       Features["ci-insts"] = true;
182       Features["dot1-insts"] = true;
183       Features["dot2-insts"] = true;
184       Features["dot5-insts"] = true;
185       Features["dot6-insts"] = true;
186       Features["dl-insts"] = true;
187       Features["flat-address-space"] = true;
188       Features["16-bit-insts"] = true;
189       Features["dpp"] = true;
190       Features["gfx8-insts"] = true;
191       Features["gfx9-insts"] = true;
192       Features["gfx10-insts"] = true;
193       Features["gfx10-3-insts"] = true;
194       Features["s-memrealtime"] = true;
195       break;
196     case GK_GFX1012:
197     case GK_GFX1011:
198       Features["dot1-insts"] = true;
199       Features["dot2-insts"] = true;
200       Features["dot5-insts"] = true;
201       Features["dot6-insts"] = true;
202       LLVM_FALLTHROUGH;
203     case GK_GFX1010:
204       Features["dl-insts"] = true;
205       Features["ci-insts"] = true;
206       Features["flat-address-space"] = true;
207       Features["16-bit-insts"] = true;
208       Features["dpp"] = true;
209       Features["gfx8-insts"] = true;
210       Features["gfx9-insts"] = true;
211       Features["gfx10-insts"] = true;
212       Features["s-memrealtime"] = true;
213       break;
214     case GK_GFX908:
215       Features["dot3-insts"] = true;
216       Features["dot4-insts"] = true;
217       Features["dot5-insts"] = true;
218       Features["dot6-insts"] = true;
219       Features["mai-insts"] = true;
220       LLVM_FALLTHROUGH;
221     case GK_GFX906:
222       Features["dl-insts"] = true;
223       Features["dot1-insts"] = true;
224       Features["dot2-insts"] = true;
225       LLVM_FALLTHROUGH;
226     case GK_GFX90C:
227     case GK_GFX909:
228     case GK_GFX904:
229     case GK_GFX902:
230     case GK_GFX900:
231       Features["gfx9-insts"] = true;
232       LLVM_FALLTHROUGH;
233     case GK_GFX810:
234     case GK_GFX805:
235     case GK_GFX803:
236     case GK_GFX802:
237     case GK_GFX801:
238       Features["gfx8-insts"] = true;
239       Features["16-bit-insts"] = true;
240       Features["dpp"] = true;
241       Features["s-memrealtime"] = true;
242       LLVM_FALLTHROUGH;
243     case GK_GFX705:
244     case GK_GFX704:
245     case GK_GFX703:
246     case GK_GFX702:
247     case GK_GFX701:
248     case GK_GFX700:
249       Features["ci-insts"] = true;
250       Features["flat-address-space"] = true;
251       LLVM_FALLTHROUGH;
252     case GK_GFX602:
253     case GK_GFX601:
254     case GK_GFX600:
255       break;
256     case GK_NONE:
257       break;
258     default:
259       llvm_unreachable("Unhandled GPU!");
260     }
261   } else {
262     if (CPU.empty())
263       CPU = "r600";
264 
265     switch (llvm::AMDGPU::parseArchR600(CPU)) {
266     case GK_CAYMAN:
267     case GK_CYPRESS:
268     case GK_RV770:
269     case GK_RV670:
270       // TODO: Add fp64 when implemented.
271       break;
272     case GK_TURKS:
273     case GK_CAICOS:
274     case GK_BARTS:
275     case GK_SUMO:
276     case GK_REDWOOD:
277     case GK_JUNIPER:
278     case GK_CEDAR:
279     case GK_RV730:
280     case GK_RV710:
281     case GK_RS880:
282     case GK_R630:
283     case GK_R600:
284       break;
285     default:
286       llvm_unreachable("Unhandled GPU!");
287     }
288   }
289 
290   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
291 }
292 
293 void AMDGPUTargetInfo::fillValidCPUList(
294     SmallVectorImpl<StringRef> &Values) const {
295   if (isAMDGCN(getTriple()))
296     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
297   else
298     llvm::AMDGPU::fillValidArchListR600(Values);
299 }
300 
301 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
302   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
303 }
304 
305 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
306                                    const TargetOptions &Opts)
307     : TargetInfo(Triple),
308       GPUKind(isAMDGCN(Triple) ?
309               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
310               llvm::AMDGPU::parseArchR600(Opts.CPU)),
311       GPUFeatures(isAMDGCN(Triple) ?
312                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
313                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
314   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
315                                         : DataLayoutStringR600);
316   assert(DataLayout->getAllocaAddrSpace() == Private);
317   GridValues = llvm::omp::AMDGPUGpuGridValues;
318 
319   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
320                      !isAMDGCN(Triple));
321   UseAddrSpaceMapMangling = true;
322 
323   HasLegalHalfType = true;
324   HasFloat16 = true;
325   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
326 
327   // Set pointer width and alignment for target address space 0.
328   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
329   if (getMaxPointerWidth() == 64) {
330     LongWidth = LongAlign = 64;
331     SizeType = UnsignedLong;
332     PtrDiffType = SignedLong;
333     IntPtrType = SignedLong;
334   }
335 
336   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
337 }
338 
339 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
340   TargetInfo::adjust(Opts);
341   // ToDo: There are still a few places using default address space as private
342   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
343   // can be removed from the following line.
344   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
345                      !isAMDGCN(getTriple()));
346 }
347 
348 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
349   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
350                                              Builtin::FirstTSBuiltin);
351 }
352 
353 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
354                                         MacroBuilder &Builder) const {
355   Builder.defineMacro("__AMD__");
356   Builder.defineMacro("__AMDGPU__");
357 
358   if (isAMDGCN(getTriple()))
359     Builder.defineMacro("__AMDGCN__");
360   else
361     Builder.defineMacro("__R600__");
362 
363   if (GPUKind != llvm::AMDGPU::GK_NONE) {
364     StringRef CanonName = isAMDGCN(getTriple()) ?
365       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
366     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
367     if (isAMDGCN(getTriple())) {
368       Builder.defineMacro("__amdgcn_processor__",
369                           Twine("\"") + Twine(CanonName) + Twine("\""));
370       Builder.defineMacro("__amdgcn_target_id__",
371                           Twine("\"") + Twine(getTargetID().getValue()) +
372                               Twine("\""));
373       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
374         auto Loc = OffloadArchFeatures.find(F);
375         if (Loc != OffloadArchFeatures.end()) {
376           std::string NewF = F.str();
377           std::replace(NewF.begin(), NewF.end(), '-', '_');
378           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
379                                   Twine("__"),
380                               Loc->second ? "1" : "0");
381         }
382       }
383     }
384   }
385 
386   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
387   // removed in the near future.
388   if (hasFMAF())
389     Builder.defineMacro("__HAS_FMAF__");
390   if (hasFastFMAF())
391     Builder.defineMacro("FP_FAST_FMAF");
392   if (hasLDEXPF())
393     Builder.defineMacro("__HAS_LDEXPF__");
394   if (hasFP64())
395     Builder.defineMacro("__HAS_FP64__");
396   if (hasFastFMA())
397     Builder.defineMacro("FP_FAST_FMA");
398 
399   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
400 }
401 
402 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
403   assert(HalfFormat == Aux->HalfFormat);
404   assert(FloatFormat == Aux->FloatFormat);
405   assert(DoubleFormat == Aux->DoubleFormat);
406 
407   // On x86_64 long double is 80-bit extended precision format, which is
408   // not supported by AMDGPU. 128-bit floating point format is also not
409   // supported by AMDGPU. Therefore keep its own format for these two types.
410   auto SaveLongDoubleFormat = LongDoubleFormat;
411   auto SaveFloat128Format = Float128Format;
412   copyAuxTarget(Aux);
413   LongDoubleFormat = SaveLongDoubleFormat;
414   Float128Format = SaveFloat128Format;
415   // For certain builtin types support on the host target, claim they are
416   // support to pass the compilation of the host code during the device-side
417   // compilation.
418   // FIXME: As the side effect, we also accept `__float128` uses in the device
419   // code. To rejct these builtin types supported in the host target but not in
420   // the device target, one approach would support `device_builtin` attribute
421   // so that we could tell the device builtin types from the host ones. The
422   // also solves the different representations of the same builtin type, such
423   // as `size_t` in the MSVC environment.
424   if (Aux->hasFloat128Type()) {
425     HasFloat128 = true;
426     Float128Format = DoubleFormat;
427   }
428 }
429