1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1032:
178     case GK_GFX1031:
179     case GK_GFX1030:
180       Features["ci-insts"] = true;
181       Features["dot1-insts"] = true;
182       Features["dot2-insts"] = true;
183       Features["dot5-insts"] = true;
184       Features["dot6-insts"] = true;
185       Features["dl-insts"] = true;
186       Features["flat-address-space"] = true;
187       Features["16-bit-insts"] = true;
188       Features["dpp"] = true;
189       Features["gfx8-insts"] = true;
190       Features["gfx9-insts"] = true;
191       Features["gfx10-insts"] = true;
192       Features["gfx10-3-insts"] = true;
193       Features["s-memrealtime"] = true;
194       break;
195     case GK_GFX1012:
196     case GK_GFX1011:
197       Features["dot1-insts"] = true;
198       Features["dot2-insts"] = true;
199       Features["dot5-insts"] = true;
200       Features["dot6-insts"] = true;
201       LLVM_FALLTHROUGH;
202     case GK_GFX1010:
203       Features["dl-insts"] = true;
204       Features["ci-insts"] = true;
205       Features["flat-address-space"] = true;
206       Features["16-bit-insts"] = true;
207       Features["dpp"] = true;
208       Features["gfx8-insts"] = true;
209       Features["gfx9-insts"] = true;
210       Features["gfx10-insts"] = true;
211       Features["s-memrealtime"] = true;
212       break;
213     case GK_GFX908:
214       Features["dot3-insts"] = true;
215       Features["dot4-insts"] = true;
216       Features["dot5-insts"] = true;
217       Features["dot6-insts"] = true;
218       Features["mai-insts"] = true;
219       LLVM_FALLTHROUGH;
220     case GK_GFX906:
221       Features["dl-insts"] = true;
222       Features["dot1-insts"] = true;
223       Features["dot2-insts"] = true;
224       LLVM_FALLTHROUGH;
225     case GK_GFX909:
226     case GK_GFX904:
227     case GK_GFX902:
228     case GK_GFX900:
229       Features["gfx9-insts"] = true;
230       LLVM_FALLTHROUGH;
231     case GK_GFX810:
232     case GK_GFX805:
233     case GK_GFX803:
234     case GK_GFX802:
235     case GK_GFX801:
236       Features["gfx8-insts"] = true;
237       Features["16-bit-insts"] = true;
238       Features["dpp"] = true;
239       Features["s-memrealtime"] = true;
240       LLVM_FALLTHROUGH;
241     case GK_GFX705:
242     case GK_GFX704:
243     case GK_GFX703:
244     case GK_GFX702:
245     case GK_GFX701:
246     case GK_GFX700:
247       Features["ci-insts"] = true;
248       Features["flat-address-space"] = true;
249       LLVM_FALLTHROUGH;
250     case GK_GFX602:
251     case GK_GFX601:
252     case GK_GFX600:
253       break;
254     case GK_NONE:
255       break;
256     default:
257       llvm_unreachable("Unhandled GPU!");
258     }
259   } else {
260     if (CPU.empty())
261       CPU = "r600";
262 
263     switch (llvm::AMDGPU::parseArchR600(CPU)) {
264     case GK_CAYMAN:
265     case GK_CYPRESS:
266     case GK_RV770:
267     case GK_RV670:
268       // TODO: Add fp64 when implemented.
269       break;
270     case GK_TURKS:
271     case GK_CAICOS:
272     case GK_BARTS:
273     case GK_SUMO:
274     case GK_REDWOOD:
275     case GK_JUNIPER:
276     case GK_CEDAR:
277     case GK_RV730:
278     case GK_RV710:
279     case GK_RS880:
280     case GK_R630:
281     case GK_R600:
282       break;
283     default:
284       llvm_unreachable("Unhandled GPU!");
285     }
286   }
287 
288   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
289 }
290 
291 void AMDGPUTargetInfo::fillValidCPUList(
292     SmallVectorImpl<StringRef> &Values) const {
293   if (isAMDGCN(getTriple()))
294     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
295   else
296     llvm::AMDGPU::fillValidArchListR600(Values);
297 }
298 
299 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
300   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
301 }
302 
303 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
304                                    const TargetOptions &Opts)
305     : TargetInfo(Triple),
306       GPUKind(isAMDGCN(Triple) ?
307               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
308               llvm::AMDGPU::parseArchR600(Opts.CPU)),
309       GPUFeatures(isAMDGCN(Triple) ?
310                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
311                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
312   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
313                                         : DataLayoutStringR600);
314   assert(DataLayout->getAllocaAddrSpace() == Private);
315   GridValues = llvm::omp::AMDGPUGpuGridValues;
316 
317   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
318                      !isAMDGCN(Triple));
319   UseAddrSpaceMapMangling = true;
320 
321   HasLegalHalfType = true;
322   HasFloat16 = true;
323   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
324 
325   // Set pointer width and alignment for target address space 0.
326   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
327   if (getMaxPointerWidth() == 64) {
328     LongWidth = LongAlign = 64;
329     SizeType = UnsignedLong;
330     PtrDiffType = SignedLong;
331     IntPtrType = SignedLong;
332   }
333 
334   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
335 }
336 
337 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
338   TargetInfo::adjust(Opts);
339   // ToDo: There are still a few places using default address space as private
340   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
341   // can be removed from the following line.
342   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
343                      !isAMDGCN(getTriple()));
344 }
345 
346 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
347   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
348                                              Builtin::FirstTSBuiltin);
349 }
350 
351 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
352                                         MacroBuilder &Builder) const {
353   Builder.defineMacro("__AMD__");
354   Builder.defineMacro("__AMDGPU__");
355 
356   if (isAMDGCN(getTriple()))
357     Builder.defineMacro("__AMDGCN__");
358   else
359     Builder.defineMacro("__R600__");
360 
361   if (GPUKind != llvm::AMDGPU::GK_NONE) {
362     StringRef CanonName = isAMDGCN(getTriple()) ?
363       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
364     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
365     if (isAMDGCN(getTriple())) {
366       Builder.defineMacro("__amdgcn_processor__",
367                           Twine("\"") + Twine(CanonName) + Twine("\""));
368       Builder.defineMacro("__amdgcn_target_id__",
369                           Twine("\"") + Twine(getTargetID().getValue()) +
370                               Twine("\""));
371       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
372         auto Loc = OffloadArchFeatures.find(F);
373         if (Loc != OffloadArchFeatures.end()) {
374           std::string NewF = F.str();
375           std::replace(NewF.begin(), NewF.end(), '-', '_');
376           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
377                                   Twine("__"),
378                               Loc->second ? "1" : "0");
379         }
380       }
381     }
382   }
383 
384   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
385   // removed in the near future.
386   if (hasFMAF())
387     Builder.defineMacro("__HAS_FMAF__");
388   if (hasFastFMAF())
389     Builder.defineMacro("FP_FAST_FMAF");
390   if (hasLDEXPF())
391     Builder.defineMacro("__HAS_LDEXPF__");
392   if (hasFP64())
393     Builder.defineMacro("__HAS_FP64__");
394   if (hasFastFMA())
395     Builder.defineMacro("FP_FAST_FMA");
396 
397   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
398 }
399 
400 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
401   assert(HalfFormat == Aux->HalfFormat);
402   assert(FloatFormat == Aux->FloatFormat);
403   assert(DoubleFormat == Aux->DoubleFormat);
404 
405   // On x86_64 long double is 80-bit extended precision format, which is
406   // not supported by AMDGPU. 128-bit floating point format is also not
407   // supported by AMDGPU. Therefore keep its own format for these two types.
408   auto SaveLongDoubleFormat = LongDoubleFormat;
409   auto SaveFloat128Format = Float128Format;
410   copyAuxTarget(Aux);
411   LongDoubleFormat = SaveLongDoubleFormat;
412   Float128Format = SaveFloat128Format;
413   // For certain builtin types support on the host target, claim they are
414   // support to pass the compilation of the host code during the device-side
415   // compilation.
416   // FIXME: As the side effect, we also accept `__float128` uses in the device
417   // code. To rejct these builtin types supported in the host target but not in
418   // the device target, one approach would support `device_builtin` attribute
419   // so that we could tell the device builtin types from the host ones. The
420   // also solves the different representations of the same builtin type, such
421   // as `size_t` in the MSVC environment.
422   if (Aux->hasFloat128Type()) {
423     HasFloat128 = true;
424     Float128Format = DoubleFormat;
425   }
426 }
427