1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1031:
178     case GK_GFX1030:
179       Features["ci-insts"] = true;
180       Features["dot1-insts"] = true;
181       Features["dot2-insts"] = true;
182       Features["dot5-insts"] = true;
183       Features["dot6-insts"] = true;
184       Features["dl-insts"] = true;
185       Features["flat-address-space"] = true;
186       Features["16-bit-insts"] = true;
187       Features["dpp"] = true;
188       Features["gfx8-insts"] = true;
189       Features["gfx9-insts"] = true;
190       Features["gfx10-insts"] = true;
191       Features["gfx10-3-insts"] = true;
192       Features["s-memrealtime"] = true;
193       break;
194     case GK_GFX1012:
195     case GK_GFX1011:
196       Features["dot1-insts"] = true;
197       Features["dot2-insts"] = true;
198       Features["dot5-insts"] = true;
199       Features["dot6-insts"] = true;
200       LLVM_FALLTHROUGH;
201     case GK_GFX1010:
202       Features["dl-insts"] = true;
203       Features["ci-insts"] = true;
204       Features["flat-address-space"] = true;
205       Features["16-bit-insts"] = true;
206       Features["dpp"] = true;
207       Features["gfx8-insts"] = true;
208       Features["gfx9-insts"] = true;
209       Features["gfx10-insts"] = true;
210       Features["s-memrealtime"] = true;
211       break;
212     case GK_GFX908:
213       Features["dot3-insts"] = true;
214       Features["dot4-insts"] = true;
215       Features["dot5-insts"] = true;
216       Features["dot6-insts"] = true;
217       Features["mai-insts"] = true;
218       LLVM_FALLTHROUGH;
219     case GK_GFX906:
220       Features["dl-insts"] = true;
221       Features["dot1-insts"] = true;
222       Features["dot2-insts"] = true;
223       LLVM_FALLTHROUGH;
224     case GK_GFX909:
225     case GK_GFX904:
226     case GK_GFX902:
227     case GK_GFX900:
228       Features["gfx9-insts"] = true;
229       LLVM_FALLTHROUGH;
230     case GK_GFX810:
231     case GK_GFX805:
232     case GK_GFX803:
233     case GK_GFX802:
234     case GK_GFX801:
235       Features["gfx8-insts"] = true;
236       Features["16-bit-insts"] = true;
237       Features["dpp"] = true;
238       Features["s-memrealtime"] = true;
239       LLVM_FALLTHROUGH;
240     case GK_GFX705:
241     case GK_GFX704:
242     case GK_GFX703:
243     case GK_GFX702:
244     case GK_GFX701:
245     case GK_GFX700:
246       Features["ci-insts"] = true;
247       Features["flat-address-space"] = true;
248       LLVM_FALLTHROUGH;
249     case GK_GFX602:
250     case GK_GFX601:
251     case GK_GFX600:
252       break;
253     case GK_NONE:
254       break;
255     default:
256       llvm_unreachable("Unhandled GPU!");
257     }
258   } else {
259     if (CPU.empty())
260       CPU = "r600";
261 
262     switch (llvm::AMDGPU::parseArchR600(CPU)) {
263     case GK_CAYMAN:
264     case GK_CYPRESS:
265     case GK_RV770:
266     case GK_RV670:
267       // TODO: Add fp64 when implemented.
268       break;
269     case GK_TURKS:
270     case GK_CAICOS:
271     case GK_BARTS:
272     case GK_SUMO:
273     case GK_REDWOOD:
274     case GK_JUNIPER:
275     case GK_CEDAR:
276     case GK_RV730:
277     case GK_RV710:
278     case GK_RS880:
279     case GK_R630:
280     case GK_R600:
281       break;
282     default:
283       llvm_unreachable("Unhandled GPU!");
284     }
285   }
286 
287   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
288 }
289 
290 void AMDGPUTargetInfo::fillValidCPUList(
291     SmallVectorImpl<StringRef> &Values) const {
292   if (isAMDGCN(getTriple()))
293     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
294   else
295     llvm::AMDGPU::fillValidArchListR600(Values);
296 }
297 
298 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
299   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
300 }
301 
302 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
303                                    const TargetOptions &Opts)
304     : TargetInfo(Triple),
305       GPUKind(isAMDGCN(Triple) ?
306               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
307               llvm::AMDGPU::parseArchR600(Opts.CPU)),
308       GPUFeatures(isAMDGCN(Triple) ?
309                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
310                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
311   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
312                                         : DataLayoutStringR600);
313   assert(DataLayout->getAllocaAddrSpace() == Private);
314   GridValues = llvm::omp::AMDGPUGpuGridValues;
315 
316   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
317                      !isAMDGCN(Triple));
318   UseAddrSpaceMapMangling = true;
319 
320   HasLegalHalfType = true;
321   HasFloat16 = true;
322   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
323 
324   // Set pointer width and alignment for target address space 0.
325   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
326   if (getMaxPointerWidth() == 64) {
327     LongWidth = LongAlign = 64;
328     SizeType = UnsignedLong;
329     PtrDiffType = SignedLong;
330     IntPtrType = SignedLong;
331   }
332 
333   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
334 }
335 
336 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
337   TargetInfo::adjust(Opts);
338   // ToDo: There are still a few places using default address space as private
339   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
340   // can be removed from the following line.
341   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
342                      !isAMDGCN(getTriple()));
343 }
344 
345 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
346   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
347                                              Builtin::FirstTSBuiltin);
348 }
349 
350 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
351                                         MacroBuilder &Builder) const {
352   Builder.defineMacro("__AMD__");
353   Builder.defineMacro("__AMDGPU__");
354 
355   if (isAMDGCN(getTriple()))
356     Builder.defineMacro("__AMDGCN__");
357   else
358     Builder.defineMacro("__R600__");
359 
360   if (GPUKind != llvm::AMDGPU::GK_NONE) {
361     StringRef CanonName = isAMDGCN(getTriple()) ?
362       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
363     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
364     if (isAMDGCN(getTriple())) {
365       Builder.defineMacro("__amdgcn_processor__",
366                           Twine("\"") + Twine(CanonName) + Twine("\""));
367       Builder.defineMacro("__amdgcn_target_id__",
368                           Twine("\"") + Twine(getTargetID().getValue()) +
369                               Twine("\""));
370       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
371         auto Loc = OffloadArchFeatures.find(F);
372         if (Loc != OffloadArchFeatures.end()) {
373           std::string NewF = F.str();
374           std::replace(NewF.begin(), NewF.end(), '-', '_');
375           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
376                                   Twine("__"),
377                               Loc->second ? "1" : "0");
378         }
379       }
380     }
381   }
382 
383   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
384   // removed in the near future.
385   if (hasFMAF())
386     Builder.defineMacro("__HAS_FMAF__");
387   if (hasFastFMAF())
388     Builder.defineMacro("FP_FAST_FMAF");
389   if (hasLDEXPF())
390     Builder.defineMacro("__HAS_LDEXPF__");
391   if (hasFP64())
392     Builder.defineMacro("__HAS_FP64__");
393   if (hasFastFMA())
394     Builder.defineMacro("FP_FAST_FMA");
395 
396   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
397 }
398 
399 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
400   assert(HalfFormat == Aux->HalfFormat);
401   assert(FloatFormat == Aux->FloatFormat);
402   assert(DoubleFormat == Aux->DoubleFormat);
403 
404   // On x86_64 long double is 80-bit extended precision format, which is
405   // not supported by AMDGPU. 128-bit floating point format is also not
406   // supported by AMDGPU. Therefore keep its own format for these two types.
407   auto SaveLongDoubleFormat = LongDoubleFormat;
408   auto SaveFloat128Format = Float128Format;
409   copyAuxTarget(Aux);
410   LongDoubleFormat = SaveLongDoubleFormat;
411   Float128Format = SaveFloat128Format;
412   // For certain builtin types support on the host target, claim they are
413   // support to pass the compilation of the host code during the device-side
414   // compilation.
415   // FIXME: As the side effect, we also accept `__float128` uses in the device
416   // code. To rejct these builtin types supported in the host target but not in
417   // the device target, one approach would support `device_builtin` attribute
418   // so that we could tell the device builtin types from the host ones. The
419   // also solves the different representations of the same builtin type, such
420   // as `size_t` in the MSVC environment.
421   if (Aux->hasFloat128Type()) {
422     HasFloat128 = true;
423     Float128Format = DoubleFormat;
424   }
425 }
426