1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1033:
178     case GK_GFX1032:
179     case GK_GFX1031:
180     case GK_GFX1030:
181       Features["ci-insts"] = true;
182       Features["dot1-insts"] = true;
183       Features["dot2-insts"] = true;
184       Features["dot5-insts"] = true;
185       Features["dot6-insts"] = true;
186       Features["dot7-insts"] = true;
187       Features["dl-insts"] = true;
188       Features["flat-address-space"] = true;
189       Features["16-bit-insts"] = true;
190       Features["dpp"] = true;
191       Features["gfx8-insts"] = true;
192       Features["gfx9-insts"] = true;
193       Features["gfx10-insts"] = true;
194       Features["gfx10-3-insts"] = true;
195       Features["s-memrealtime"] = true;
196       Features["s-memtime-inst"] = true;
197       break;
198     case GK_GFX1012:
199     case GK_GFX1011:
200       Features["dot1-insts"] = true;
201       Features["dot2-insts"] = true;
202       Features["dot5-insts"] = true;
203       Features["dot6-insts"] = true;
204       Features["dot7-insts"] = true;
205       LLVM_FALLTHROUGH;
206     case GK_GFX1010:
207       Features["dl-insts"] = true;
208       Features["ci-insts"] = true;
209       Features["flat-address-space"] = true;
210       Features["16-bit-insts"] = true;
211       Features["dpp"] = true;
212       Features["gfx8-insts"] = true;
213       Features["gfx9-insts"] = true;
214       Features["gfx10-insts"] = true;
215       Features["s-memrealtime"] = true;
216       Features["s-memtime-inst"] = true;
217       break;
218     case GK_GFX90A:
219       Features["gfx90a-insts"] = true;
220       LLVM_FALLTHROUGH;
221     case GK_GFX908:
222       Features["dot3-insts"] = true;
223       Features["dot4-insts"] = true;
224       Features["dot5-insts"] = true;
225       Features["dot6-insts"] = true;
226       Features["mai-insts"] = true;
227       LLVM_FALLTHROUGH;
228     case GK_GFX906:
229       Features["dl-insts"] = true;
230       Features["dot1-insts"] = true;
231       Features["dot2-insts"] = true;
232       Features["dot7-insts"] = true;
233       LLVM_FALLTHROUGH;
234     case GK_GFX90C:
235     case GK_GFX909:
236     case GK_GFX904:
237     case GK_GFX902:
238     case GK_GFX900:
239       Features["gfx9-insts"] = true;
240       LLVM_FALLTHROUGH;
241     case GK_GFX810:
242     case GK_GFX805:
243     case GK_GFX803:
244     case GK_GFX802:
245     case GK_GFX801:
246       Features["gfx8-insts"] = true;
247       Features["16-bit-insts"] = true;
248       Features["dpp"] = true;
249       Features["s-memrealtime"] = true;
250       LLVM_FALLTHROUGH;
251     case GK_GFX705:
252     case GK_GFX704:
253     case GK_GFX703:
254     case GK_GFX702:
255     case GK_GFX701:
256     case GK_GFX700:
257       Features["ci-insts"] = true;
258       Features["flat-address-space"] = true;
259       LLVM_FALLTHROUGH;
260     case GK_GFX602:
261     case GK_GFX601:
262     case GK_GFX600:
263       Features["s-memtime-inst"] = true;
264       break;
265     case GK_NONE:
266       break;
267     default:
268       llvm_unreachable("Unhandled GPU!");
269     }
270   } else {
271     if (CPU.empty())
272       CPU = "r600";
273 
274     switch (llvm::AMDGPU::parseArchR600(CPU)) {
275     case GK_CAYMAN:
276     case GK_CYPRESS:
277     case GK_RV770:
278     case GK_RV670:
279       // TODO: Add fp64 when implemented.
280       break;
281     case GK_TURKS:
282     case GK_CAICOS:
283     case GK_BARTS:
284     case GK_SUMO:
285     case GK_REDWOOD:
286     case GK_JUNIPER:
287     case GK_CEDAR:
288     case GK_RV730:
289     case GK_RV710:
290     case GK_RS880:
291     case GK_R630:
292     case GK_R600:
293       break;
294     default:
295       llvm_unreachable("Unhandled GPU!");
296     }
297   }
298 
299   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
300 }
301 
302 void AMDGPUTargetInfo::fillValidCPUList(
303     SmallVectorImpl<StringRef> &Values) const {
304   if (isAMDGCN(getTriple()))
305     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
306   else
307     llvm::AMDGPU::fillValidArchListR600(Values);
308 }
309 
310 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
311   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
312 }
313 
314 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
315                                    const TargetOptions &Opts)
316     : TargetInfo(Triple),
317       GPUKind(isAMDGCN(Triple) ?
318               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
319               llvm::AMDGPU::parseArchR600(Opts.CPU)),
320       GPUFeatures(isAMDGCN(Triple) ?
321                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
322                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
323   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
324                                         : DataLayoutStringR600);
325   assert(DataLayout->getAllocaAddrSpace() == Private);
326   GridValues = llvm::omp::AMDGPUGpuGridValues;
327 
328   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
329                      !isAMDGCN(Triple));
330   UseAddrSpaceMapMangling = true;
331 
332   HasLegalHalfType = true;
333   HasFloat16 = true;
334   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
335   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
336 
337   // Set pointer width and alignment for target address space 0.
338   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
339   if (getMaxPointerWidth() == 64) {
340     LongWidth = LongAlign = 64;
341     SizeType = UnsignedLong;
342     PtrDiffType = SignedLong;
343     IntPtrType = SignedLong;
344   }
345 
346   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
347 }
348 
349 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
350   TargetInfo::adjust(Opts);
351   // ToDo: There are still a few places using default address space as private
352   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
353   // can be removed from the following line.
354   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
355                      !isAMDGCN(getTriple()));
356 }
357 
358 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
359   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
360                                              Builtin::FirstTSBuiltin);
361 }
362 
363 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
364                                         MacroBuilder &Builder) const {
365   Builder.defineMacro("__AMD__");
366   Builder.defineMacro("__AMDGPU__");
367 
368   if (isAMDGCN(getTriple()))
369     Builder.defineMacro("__AMDGCN__");
370   else
371     Builder.defineMacro("__R600__");
372 
373   if (GPUKind != llvm::AMDGPU::GK_NONE) {
374     StringRef CanonName = isAMDGCN(getTriple()) ?
375       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
376     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
377     if (isAMDGCN(getTriple())) {
378       Builder.defineMacro("__amdgcn_processor__",
379                           Twine("\"") + Twine(CanonName) + Twine("\""));
380       Builder.defineMacro("__amdgcn_target_id__",
381                           Twine("\"") + Twine(getTargetID().getValue()) +
382                               Twine("\""));
383       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
384         auto Loc = OffloadArchFeatures.find(F);
385         if (Loc != OffloadArchFeatures.end()) {
386           std::string NewF = F.str();
387           std::replace(NewF.begin(), NewF.end(), '-', '_');
388           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
389                                   Twine("__"),
390                               Loc->second ? "1" : "0");
391         }
392       }
393     }
394   }
395 
396   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
397   // removed in the near future.
398   if (hasFMAF())
399     Builder.defineMacro("__HAS_FMAF__");
400   if (hasFastFMAF())
401     Builder.defineMacro("FP_FAST_FMAF");
402   if (hasLDEXPF())
403     Builder.defineMacro("__HAS_LDEXPF__");
404   if (hasFP64())
405     Builder.defineMacro("__HAS_FP64__");
406   if (hasFastFMA())
407     Builder.defineMacro("FP_FAST_FMA");
408 
409   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
410 }
411 
412 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
413   assert(HalfFormat == Aux->HalfFormat);
414   assert(FloatFormat == Aux->FloatFormat);
415   assert(DoubleFormat == Aux->DoubleFormat);
416 
417   // On x86_64 long double is 80-bit extended precision format, which is
418   // not supported by AMDGPU. 128-bit floating point format is also not
419   // supported by AMDGPU. Therefore keep its own format for these two types.
420   auto SaveLongDoubleFormat = LongDoubleFormat;
421   auto SaveFloat128Format = Float128Format;
422   copyAuxTarget(Aux);
423   LongDoubleFormat = SaveLongDoubleFormat;
424   Float128Format = SaveFloat128Format;
425   // For certain builtin types support on the host target, claim they are
426   // support to pass the compilation of the host code during the device-side
427   // compilation.
428   // FIXME: As the side effect, we also accept `__float128` uses in the device
429   // code. To rejct these builtin types supported in the host target but not in
430   // the device target, one approach would support `device_builtin` attribute
431   // so that we could tell the device builtin types from the host ones. The
432   // also solves the different representations of the same builtin type, such
433   // as `size_t` in the MSVC environment.
434   if (Aux->hasFloat128Type()) {
435     HasFloat128 = true;
436     Float128Format = DoubleFormat;
437   }
438 }
439