1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1033:
178     case GK_GFX1032:
179     case GK_GFX1031:
180     case GK_GFX1030:
181       Features["ci-insts"] = true;
182       Features["dot1-insts"] = true;
183       Features["dot2-insts"] = true;
184       Features["dot5-insts"] = true;
185       Features["dot6-insts"] = true;
186       Features["dl-insts"] = true;
187       Features["flat-address-space"] = true;
188       Features["16-bit-insts"] = true;
189       Features["dpp"] = true;
190       Features["gfx8-insts"] = true;
191       Features["gfx9-insts"] = true;
192       Features["gfx10-insts"] = true;
193       Features["gfx10-3-insts"] = true;
194       Features["s-memrealtime"] = true;
195       Features["s-memtime-inst"] = true;
196       break;
197     case GK_GFX1012:
198     case GK_GFX1011:
199       Features["dot1-insts"] = true;
200       Features["dot2-insts"] = true;
201       Features["dot5-insts"] = true;
202       Features["dot6-insts"] = true;
203       LLVM_FALLTHROUGH;
204     case GK_GFX1010:
205       Features["dl-insts"] = true;
206       Features["ci-insts"] = true;
207       Features["flat-address-space"] = true;
208       Features["16-bit-insts"] = true;
209       Features["dpp"] = true;
210       Features["gfx8-insts"] = true;
211       Features["gfx9-insts"] = true;
212       Features["gfx10-insts"] = true;
213       Features["s-memrealtime"] = true;
214       Features["s-memtime-inst"] = true;
215       break;
216     case GK_GFX90A:
217       Features["gfx90a-insts"] = true;
218       LLVM_FALLTHROUGH;
219     case GK_GFX908:
220       Features["dot3-insts"] = true;
221       Features["dot4-insts"] = true;
222       Features["dot5-insts"] = true;
223       Features["dot6-insts"] = true;
224       Features["mai-insts"] = true;
225       LLVM_FALLTHROUGH;
226     case GK_GFX906:
227       Features["dl-insts"] = true;
228       Features["dot1-insts"] = true;
229       Features["dot2-insts"] = true;
230       LLVM_FALLTHROUGH;
231     case GK_GFX90C:
232     case GK_GFX909:
233     case GK_GFX904:
234     case GK_GFX902:
235     case GK_GFX900:
236       Features["gfx9-insts"] = true;
237       LLVM_FALLTHROUGH;
238     case GK_GFX810:
239     case GK_GFX805:
240     case GK_GFX803:
241     case GK_GFX802:
242     case GK_GFX801:
243       Features["gfx8-insts"] = true;
244       Features["16-bit-insts"] = true;
245       Features["dpp"] = true;
246       Features["s-memrealtime"] = true;
247       LLVM_FALLTHROUGH;
248     case GK_GFX705:
249     case GK_GFX704:
250     case GK_GFX703:
251     case GK_GFX702:
252     case GK_GFX701:
253     case GK_GFX700:
254       Features["ci-insts"] = true;
255       Features["flat-address-space"] = true;
256       LLVM_FALLTHROUGH;
257     case GK_GFX602:
258     case GK_GFX601:
259     case GK_GFX600:
260       Features["s-memtime-inst"] = true;
261       break;
262     case GK_NONE:
263       break;
264     default:
265       llvm_unreachable("Unhandled GPU!");
266     }
267   } else {
268     if (CPU.empty())
269       CPU = "r600";
270 
271     switch (llvm::AMDGPU::parseArchR600(CPU)) {
272     case GK_CAYMAN:
273     case GK_CYPRESS:
274     case GK_RV770:
275     case GK_RV670:
276       // TODO: Add fp64 when implemented.
277       break;
278     case GK_TURKS:
279     case GK_CAICOS:
280     case GK_BARTS:
281     case GK_SUMO:
282     case GK_REDWOOD:
283     case GK_JUNIPER:
284     case GK_CEDAR:
285     case GK_RV730:
286     case GK_RV710:
287     case GK_RS880:
288     case GK_R630:
289     case GK_R600:
290       break;
291     default:
292       llvm_unreachable("Unhandled GPU!");
293     }
294   }
295 
296   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
297 }
298 
299 void AMDGPUTargetInfo::fillValidCPUList(
300     SmallVectorImpl<StringRef> &Values) const {
301   if (isAMDGCN(getTriple()))
302     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
303   else
304     llvm::AMDGPU::fillValidArchListR600(Values);
305 }
306 
307 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
308   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
309 }
310 
311 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
312                                    const TargetOptions &Opts)
313     : TargetInfo(Triple),
314       GPUKind(isAMDGCN(Triple) ?
315               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
316               llvm::AMDGPU::parseArchR600(Opts.CPU)),
317       GPUFeatures(isAMDGCN(Triple) ?
318                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
319                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
320   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
321                                         : DataLayoutStringR600);
322   assert(DataLayout->getAllocaAddrSpace() == Private);
323   GridValues = llvm::omp::AMDGPUGpuGridValues;
324 
325   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
326                      !isAMDGCN(Triple));
327   UseAddrSpaceMapMangling = true;
328 
329   HasLegalHalfType = true;
330   HasFloat16 = true;
331   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
332   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
333 
334   // Set pointer width and alignment for target address space 0.
335   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
336   if (getMaxPointerWidth() == 64) {
337     LongWidth = LongAlign = 64;
338     SizeType = UnsignedLong;
339     PtrDiffType = SignedLong;
340     IntPtrType = SignedLong;
341   }
342 
343   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
344 }
345 
346 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
347   TargetInfo::adjust(Opts);
348   // ToDo: There are still a few places using default address space as private
349   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
350   // can be removed from the following line.
351   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
352                      !isAMDGCN(getTriple()));
353 }
354 
355 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
356   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
357                                              Builtin::FirstTSBuiltin);
358 }
359 
360 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
361                                         MacroBuilder &Builder) const {
362   Builder.defineMacro("__AMD__");
363   Builder.defineMacro("__AMDGPU__");
364 
365   if (isAMDGCN(getTriple()))
366     Builder.defineMacro("__AMDGCN__");
367   else
368     Builder.defineMacro("__R600__");
369 
370   if (GPUKind != llvm::AMDGPU::GK_NONE) {
371     StringRef CanonName = isAMDGCN(getTriple()) ?
372       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
373     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
374     if (isAMDGCN(getTriple())) {
375       Builder.defineMacro("__amdgcn_processor__",
376                           Twine("\"") + Twine(CanonName) + Twine("\""));
377       Builder.defineMacro("__amdgcn_target_id__",
378                           Twine("\"") + Twine(getTargetID().getValue()) +
379                               Twine("\""));
380       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
381         auto Loc = OffloadArchFeatures.find(F);
382         if (Loc != OffloadArchFeatures.end()) {
383           std::string NewF = F.str();
384           std::replace(NewF.begin(), NewF.end(), '-', '_');
385           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
386                                   Twine("__"),
387                               Loc->second ? "1" : "0");
388         }
389       }
390     }
391   }
392 
393   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
394   // removed in the near future.
395   if (hasFMAF())
396     Builder.defineMacro("__HAS_FMAF__");
397   if (hasFastFMAF())
398     Builder.defineMacro("FP_FAST_FMAF");
399   if (hasLDEXPF())
400     Builder.defineMacro("__HAS_LDEXPF__");
401   if (hasFP64())
402     Builder.defineMacro("__HAS_FP64__");
403   if (hasFastFMA())
404     Builder.defineMacro("FP_FAST_FMA");
405 
406   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
407 }
408 
409 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
410   assert(HalfFormat == Aux->HalfFormat);
411   assert(FloatFormat == Aux->FloatFormat);
412   assert(DoubleFormat == Aux->DoubleFormat);
413 
414   // On x86_64 long double is 80-bit extended precision format, which is
415   // not supported by AMDGPU. 128-bit floating point format is also not
416   // supported by AMDGPU. Therefore keep its own format for these two types.
417   auto SaveLongDoubleFormat = LongDoubleFormat;
418   auto SaveFloat128Format = Float128Format;
419   copyAuxTarget(Aux);
420   LongDoubleFormat = SaveLongDoubleFormat;
421   Float128Format = SaveFloat128Format;
422   // For certain builtin types support on the host target, claim they are
423   // support to pass the compilation of the host code during the device-side
424   // compilation.
425   // FIXME: As the side effect, we also accept `__float128` uses in the device
426   // code. To rejct these builtin types supported in the host target but not in
427   // the device target, one approach would support `device_builtin` attribute
428   // so that we could tell the device builtin types from the host ones. The
429   // also solves the different representations of the same builtin type, such
430   // as `size_t` in the MSVC environment.
431   if (Aux->hasFloat128Type()) {
432     HasFloat128 = true;
433     Float128Format = DoubleFormat;
434   }
435 }
436