1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // cuda_device
50     Constant, // cuda_constant
51     Local,    // cuda_shared
52     Generic,  // ptr32_sptr
53     Generic,  // ptr32_uptr
54     Generic   // ptr64
55 };
56 
57 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
58     Private,  // Default
59     Global,   // opencl_global
60     Local,    // opencl_local
61     Constant, // opencl_constant
62     Private,  // opencl_private
63     Generic,  // opencl_generic
64     Global,   // cuda_device
65     Constant, // cuda_constant
66     Local,    // cuda_shared
67     Generic,  // ptr32_sptr
68     Generic,  // ptr32_uptr
69     Generic   // ptr64
70 
71 };
72 } // namespace targets
73 } // namespace clang
74 
75 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
76 #define BUILTIN(ID, TYPE, ATTRS)                                               \
77   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
78 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
79   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
80 #include "clang/Basic/BuiltinsAMDGPU.def"
81 };
82 
83 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
84   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
85   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
86   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
87   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
88   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
89   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
90   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
91   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
92   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
93   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
94   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
95   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
96   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
97   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
98   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
99   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
100   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
101   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
102   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
103   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
104   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
105   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
106   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
107   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
108   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
109   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
110   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
111   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
112   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
113   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
114   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
115   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
116   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
117   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
118   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
119   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
120   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
121   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
122   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
123   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
124   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
125   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
126   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
127   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
128   "flat_scratch_lo", "flat_scratch_hi",
129   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
130   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
131   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
132   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
133   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
134   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
135   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
136   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
137   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
138   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
139   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
140   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
141   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
142   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
143   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
144   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
145   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
146   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
147   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
148   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
149   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
150   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
151   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
152   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
153   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
154   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
155   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
156   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
157   "a252", "a253", "a254", "a255"
158 };
159 
160 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
161   return llvm::makeArrayRef(GCCRegNames);
162 }
163 
164 bool AMDGPUTargetInfo::initFeatureMap(
165     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
166     const std::vector<std::string> &FeatureVec) const {
167 
168   using namespace llvm::AMDGPU;
169 
170   // XXX - What does the member GPU mean if device name string passed here?
171   if (isAMDGCN(getTriple())) {
172     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
173     case GK_GFX1012:
174     case GK_GFX1011:
175       Features["dot1-insts"] = true;
176       Features["dot2-insts"] = true;
177       Features["dot5-insts"] = true;
178       Features["dot6-insts"] = true;
179       LLVM_FALLTHROUGH;
180     case GK_GFX1010:
181       Features["dl-insts"] = true;
182       Features["ci-insts"] = true;
183       Features["flat-address-space"] = true;
184       Features["16-bit-insts"] = true;
185       Features["dpp"] = true;
186       Features["gfx8-insts"] = true;
187       Features["gfx9-insts"] = true;
188       Features["gfx10-insts"] = true;
189       Features["s-memrealtime"] = true;
190       break;
191     case GK_GFX908:
192       Features["dot3-insts"] = true;
193       Features["dot4-insts"] = true;
194       Features["dot5-insts"] = true;
195       Features["dot6-insts"] = true;
196       Features["mai-insts"] = true;
197       LLVM_FALLTHROUGH;
198     case GK_GFX906:
199       Features["dl-insts"] = true;
200       Features["dot1-insts"] = true;
201       Features["dot2-insts"] = true;
202       LLVM_FALLTHROUGH;
203     case GK_GFX909:
204     case GK_GFX904:
205     case GK_GFX902:
206     case GK_GFX900:
207       Features["gfx9-insts"] = true;
208       LLVM_FALLTHROUGH;
209     case GK_GFX810:
210     case GK_GFX803:
211     case GK_GFX802:
212     case GK_GFX801:
213       Features["gfx8-insts"] = true;
214       Features["16-bit-insts"] = true;
215       Features["dpp"] = true;
216       Features["s-memrealtime"] = true;
217       LLVM_FALLTHROUGH;
218     case GK_GFX704:
219     case GK_GFX703:
220     case GK_GFX702:
221     case GK_GFX701:
222     case GK_GFX700:
223       Features["ci-insts"] = true;
224       Features["flat-address-space"] = true;
225       LLVM_FALLTHROUGH;
226     case GK_GFX601:
227     case GK_GFX600:
228       break;
229     case GK_NONE:
230       break;
231     default:
232       llvm_unreachable("Unhandled GPU!");
233     }
234   } else {
235     if (CPU.empty())
236       CPU = "r600";
237 
238     switch (llvm::AMDGPU::parseArchR600(CPU)) {
239     case GK_CAYMAN:
240     case GK_CYPRESS:
241     case GK_RV770:
242     case GK_RV670:
243       // TODO: Add fp64 when implemented.
244       break;
245     case GK_TURKS:
246     case GK_CAICOS:
247     case GK_BARTS:
248     case GK_SUMO:
249     case GK_REDWOOD:
250     case GK_JUNIPER:
251     case GK_CEDAR:
252     case GK_RV730:
253     case GK_RV710:
254     case GK_RS880:
255     case GK_R630:
256     case GK_R600:
257       break;
258     default:
259       llvm_unreachable("Unhandled GPU!");
260     }
261   }
262 
263   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
264 }
265 
266 void AMDGPUTargetInfo::fillValidCPUList(
267     SmallVectorImpl<StringRef> &Values) const {
268   if (isAMDGCN(getTriple()))
269     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
270   else
271     llvm::AMDGPU::fillValidArchListR600(Values);
272 }
273 
274 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
275   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
276 }
277 
278 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
279                                    const TargetOptions &Opts)
280     : TargetInfo(Triple),
281       GPUKind(isAMDGCN(Triple) ?
282               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
283               llvm::AMDGPU::parseArchR600(Opts.CPU)),
284       GPUFeatures(isAMDGCN(Triple) ?
285                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
286                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
287   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
288                                         : DataLayoutStringR600);
289   assert(DataLayout->getAllocaAddrSpace() == Private);
290   GridValues = llvm::omp::AMDGPUGpuGridValues;
291 
292   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
293                      !isAMDGCN(Triple));
294   UseAddrSpaceMapMangling = true;
295 
296   HasLegalHalfType = true;
297   HasFloat16 = true;
298 
299   // Set pointer width and alignment for target address space 0.
300   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
301   if (getMaxPointerWidth() == 64) {
302     LongWidth = LongAlign = 64;
303     SizeType = UnsignedLong;
304     PtrDiffType = SignedLong;
305     IntPtrType = SignedLong;
306   }
307 
308   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
309 }
310 
311 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
312   TargetInfo::adjust(Opts);
313   // ToDo: There are still a few places using default address space as private
314   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
315   // can be removed from the following line.
316   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
317                      !isAMDGCN(getTriple()));
318 }
319 
320 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
321   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
322                                              Builtin::FirstTSBuiltin);
323 }
324 
325 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
326                                         MacroBuilder &Builder) const {
327   Builder.defineMacro("__AMD__");
328   Builder.defineMacro("__AMDGPU__");
329 
330   if (isAMDGCN(getTriple()))
331     Builder.defineMacro("__AMDGCN__");
332   else
333     Builder.defineMacro("__R600__");
334 
335   if (GPUKind != llvm::AMDGPU::GK_NONE) {
336     StringRef CanonName = isAMDGCN(getTriple()) ?
337       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
338     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
339   }
340 
341   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
342   // removed in the near future.
343   if (hasFMAF())
344     Builder.defineMacro("__HAS_FMAF__");
345   if (hasFastFMAF())
346     Builder.defineMacro("FP_FAST_FMAF");
347   if (hasLDEXPF())
348     Builder.defineMacro("__HAS_LDEXPF__");
349   if (hasFP64())
350     Builder.defineMacro("__HAS_FP64__");
351   if (hasFastFMA())
352     Builder.defineMacro("FP_FAST_FMA");
353 }
354 
355 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
356   assert(HalfFormat == Aux->HalfFormat);
357   assert(FloatFormat == Aux->FloatFormat);
358   assert(DoubleFormat == Aux->DoubleFormat);
359 
360   // On x86_64 long double is 80-bit extended precision format, which is
361   // not supported by AMDGPU. 128-bit floating point format is also not
362   // supported by AMDGPU. Therefore keep its own format for these two types.
363   auto SaveLongDoubleFormat = LongDoubleFormat;
364   auto SaveFloat128Format = Float128Format;
365   copyAuxTarget(Aux);
366   LongDoubleFormat = SaveLongDoubleFormat;
367   Float128Format = SaveFloat128Format;
368   // For certain builtin types support on the host target, claim they are
369   // support to pass the compilation of the host code during the device-side
370   // compilation.
371   // FIXME: As the side effect, we also accept `__float128` uses in the device
372   // code. To rejct these builtin types supported in the host target but not in
373   // the device target, one approach would support `device_builtin` attribute
374   // so that we could tell the device builtin types from the host ones. The
375   // also solves the different representations of the same builtin type, such
376   // as `size_t` in the MSVC environment.
377   if (Aux->hasFloat128Type()) {
378     HasFloat128 = true;
379     Float128Format = DoubleFormat;
380   }
381 }
382