1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/IR/DataLayout.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // cuda_device
49     Constant, // cuda_constant
50     Local,    // cuda_shared
51     Generic,  // ptr32_sptr
52     Generic,  // ptr32_uptr
53     Generic   // ptr64
54 };
55 
56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
57     Private,  // Default
58     Global,   // opencl_global
59     Local,    // opencl_local
60     Constant, // opencl_constant
61     Private,  // opencl_private
62     Generic,  // opencl_generic
63     Global,   // cuda_device
64     Constant, // cuda_constant
65     Local,    // cuda_shared
66     Generic,  // ptr32_sptr
67     Generic,  // ptr32_uptr
68     Generic   // ptr64
69 
70 };
71 } // namespace targets
72 } // namespace clang
73 
74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75 #define BUILTIN(ID, TYPE, ATTRS)                                               \
76   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
78   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79 #include "clang/Basic/BuiltinsAMDGPU.def"
80 };
81 
82 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
83   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127   "flat_scratch_lo", "flat_scratch_hi",
128   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
129   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
130   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
131   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
132   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
133   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
134   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
135   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
136   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
137   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
138   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
139   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
140   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
141   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
142   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
143   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
144   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
145   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
146   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
147   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
148   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
149   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
150   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
151   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
152   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
153   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
154   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
155   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
156   "a252", "a253", "a254", "a255"
157 };
158 
159 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
160   return llvm::makeArrayRef(GCCRegNames);
161 }
162 
163 bool AMDGPUTargetInfo::initFeatureMap(
164     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
165     const std::vector<std::string> &FeatureVec) const {
166 
167   using namespace llvm::AMDGPU;
168 
169   // XXX - What does the member GPU mean if device name string passed here?
170   if (isAMDGCN(getTriple())) {
171     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
172     case GK_GFX1012:
173     case GK_GFX1011:
174       Features["dot1-insts"] = true;
175       Features["dot2-insts"] = true;
176       Features["dot5-insts"] = true;
177       Features["dot6-insts"] = true;
178       LLVM_FALLTHROUGH;
179     case GK_GFX1010:
180       Features["dl-insts"] = true;
181       Features["ci-insts"] = true;
182       Features["flat-address-space"] = true;
183       Features["16-bit-insts"] = true;
184       Features["dpp"] = true;
185       Features["gfx8-insts"] = true;
186       Features["gfx9-insts"] = true;
187       Features["gfx10-insts"] = true;
188       Features["s-memrealtime"] = true;
189       break;
190     case GK_GFX908:
191       Features["dot3-insts"] = true;
192       Features["dot4-insts"] = true;
193       Features["dot5-insts"] = true;
194       Features["dot6-insts"] = true;
195       Features["mai-insts"] = true;
196       LLVM_FALLTHROUGH;
197     case GK_GFX906:
198       Features["dl-insts"] = true;
199       Features["dot1-insts"] = true;
200       Features["dot2-insts"] = true;
201       LLVM_FALLTHROUGH;
202     case GK_GFX909:
203     case GK_GFX904:
204     case GK_GFX902:
205     case GK_GFX900:
206       Features["gfx9-insts"] = true;
207       LLVM_FALLTHROUGH;
208     case GK_GFX810:
209     case GK_GFX803:
210     case GK_GFX802:
211     case GK_GFX801:
212       Features["gfx8-insts"] = true;
213       Features["16-bit-insts"] = true;
214       Features["dpp"] = true;
215       Features["s-memrealtime"] = true;
216       LLVM_FALLTHROUGH;
217     case GK_GFX704:
218     case GK_GFX703:
219     case GK_GFX702:
220     case GK_GFX701:
221     case GK_GFX700:
222       Features["ci-insts"] = true;
223       Features["flat-address-space"] = true;
224       LLVM_FALLTHROUGH;
225     case GK_GFX601:
226     case GK_GFX600:
227       break;
228     case GK_NONE:
229       break;
230     default:
231       llvm_unreachable("Unhandled GPU!");
232     }
233   } else {
234     if (CPU.empty())
235       CPU = "r600";
236 
237     switch (llvm::AMDGPU::parseArchR600(CPU)) {
238     case GK_CAYMAN:
239     case GK_CYPRESS:
240     case GK_RV770:
241     case GK_RV670:
242       // TODO: Add fp64 when implemented.
243       break;
244     case GK_TURKS:
245     case GK_CAICOS:
246     case GK_BARTS:
247     case GK_SUMO:
248     case GK_REDWOOD:
249     case GK_JUNIPER:
250     case GK_CEDAR:
251     case GK_RV730:
252     case GK_RV710:
253     case GK_RS880:
254     case GK_R630:
255     case GK_R600:
256       break;
257     default:
258       llvm_unreachable("Unhandled GPU!");
259     }
260   }
261 
262   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
263 }
264 
265 void AMDGPUTargetInfo::fillValidCPUList(
266     SmallVectorImpl<StringRef> &Values) const {
267   if (isAMDGCN(getTriple()))
268     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
269   else
270     llvm::AMDGPU::fillValidArchListR600(Values);
271 }
272 
273 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
274   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
275 }
276 
277 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
278                                    const TargetOptions &Opts)
279     : TargetInfo(Triple),
280       GPUKind(isAMDGCN(Triple) ?
281               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
282               llvm::AMDGPU::parseArchR600(Opts.CPU)),
283       GPUFeatures(isAMDGCN(Triple) ?
284                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
285                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
286   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
287                                         : DataLayoutStringR600);
288   assert(DataLayout->getAllocaAddrSpace() == Private);
289 
290   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
291                      !isAMDGCN(Triple));
292   UseAddrSpaceMapMangling = true;
293 
294   HasLegalHalfType = true;
295   HasFloat16 = true;
296 
297   // Set pointer width and alignment for target address space 0.
298   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
299   if (getMaxPointerWidth() == 64) {
300     LongWidth = LongAlign = 64;
301     SizeType = UnsignedLong;
302     PtrDiffType = SignedLong;
303     IntPtrType = SignedLong;
304   }
305 
306   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
307 }
308 
309 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
310   TargetInfo::adjust(Opts);
311   // ToDo: There are still a few places using default address space as private
312   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
313   // can be removed from the following line.
314   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
315                      !isAMDGCN(getTriple()));
316 }
317 
318 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
319   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
320                                              Builtin::FirstTSBuiltin);
321 }
322 
323 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
324                                         MacroBuilder &Builder) const {
325   Builder.defineMacro("__AMD__");
326   Builder.defineMacro("__AMDGPU__");
327 
328   if (isAMDGCN(getTriple()))
329     Builder.defineMacro("__AMDGCN__");
330   else
331     Builder.defineMacro("__R600__");
332 
333   if (GPUKind != llvm::AMDGPU::GK_NONE) {
334     StringRef CanonName = isAMDGCN(getTriple()) ?
335       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
336     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
337   }
338 
339   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
340   // removed in the near future.
341   if (hasFMAF())
342     Builder.defineMacro("__HAS_FMAF__");
343   if (hasFastFMAF())
344     Builder.defineMacro("FP_FAST_FMAF");
345   if (hasLDEXPF())
346     Builder.defineMacro("__HAS_LDEXPF__");
347   if (hasFP64())
348     Builder.defineMacro("__HAS_FP64__");
349   if (hasFastFMA())
350     Builder.defineMacro("FP_FAST_FMA");
351 }
352 
353 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
354   assert(HalfFormat == Aux->HalfFormat);
355   assert(FloatFormat == Aux->FloatFormat);
356   assert(DoubleFormat == Aux->DoubleFormat);
357 
358   // On x86_64 long double is 80-bit extended precision format, which is
359   // not supported by AMDGPU. 128-bit floating point format is also not
360   // supported by AMDGPU. Therefore keep its own format for these two types.
361   auto SaveLongDoubleFormat = LongDoubleFormat;
362   auto SaveFloat128Format = Float128Format;
363   copyAuxTarget(Aux);
364   LongDoubleFormat = SaveLongDoubleFormat;
365   Float128Format = SaveFloat128Format;
366   // For certain builtin types support on the host target, claim they are
367   // support to pass the compilation of the host code during the device-side
368   // compilation.
369   // FIXME: As the side effect, we also accept `__float128` uses in the device
370   // code. To rejct these builtin types supported in the host target but not in
371   // the device target, one approach would support `device_builtin` attribute
372   // so that we could tell the device builtin types from the host ones. The
373   // also solves the different representations of the same builtin type, such
374   // as `size_t` in the MSVC environment.
375   if (Aux->hasFloat128Type()) {
376     HasFloat128 = true;
377     Float128Format = DoubleFormat;
378   }
379 }
380