1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21
22 using namespace clang;
23 using namespace clang::targets;
24
25 namespace clang {
26 namespace targets {
27
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30
31 static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
34
35 static const char *const DataLayoutStringAMDGCN =
36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39 "-ni:7";
40
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42 Generic, // Default
43 Global, // opencl_global
44 Local, // opencl_local
45 Constant, // opencl_constant
46 Private, // opencl_private
47 Generic, // opencl_generic
48 Global, // opencl_global_device
49 Global, // opencl_global_host
50 Global, // cuda_device
51 Constant, // cuda_constant
52 Local, // cuda_shared
53 Global, // sycl_global
54 Global, // sycl_global_device
55 Global, // sycl_global_host
56 Local, // sycl_local
57 Private, // sycl_private
58 Generic, // ptr32_sptr
59 Generic, // ptr32_uptr
60 Generic // ptr64
61 };
62
63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
64 Private, // Default
65 Global, // opencl_global
66 Local, // opencl_local
67 Constant, // opencl_constant
68 Private, // opencl_private
69 Generic, // opencl_generic
70 Global, // opencl_global_device
71 Global, // opencl_global_host
72 Global, // cuda_device
73 Constant, // cuda_constant
74 Local, // cuda_shared
75 // SYCL address space values for this map are dummy
76 Generic, // sycl_global
77 Generic, // sycl_global_device
78 Generic, // sycl_global_host
79 Generic, // sycl_local
80 Generic, // sycl_private
81 Generic, // ptr32_sptr
82 Generic, // ptr32_uptr
83 Generic // ptr64
84
85 };
86 } // namespace targets
87 } // namespace clang
88
89 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
90 #define BUILTIN(ID, TYPE, ATTRS) \
91 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
93 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
94 #include "clang/Basic/BuiltinsAMDGPU.def"
95 };
96
97 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142 "flat_scratch_lo", "flat_scratch_hi",
143 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171 "a252", "a253", "a254", "a255"
172 };
173
getGCCRegNames() const174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175 return llvm::makeArrayRef(GCCRegNames);
176 }
177
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const178 bool AMDGPUTargetInfo::initFeatureMap(
179 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180 const std::vector<std::string> &FeatureVec) const {
181
182 using namespace llvm::AMDGPU;
183
184 // XXX - What does the member GPU mean if device name string passed here?
185 if (isAMDGCN(getTriple())) {
186 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
187 case GK_GFX1035:
188 case GK_GFX1034:
189 case GK_GFX1033:
190 case GK_GFX1032:
191 case GK_GFX1031:
192 case GK_GFX1030:
193 Features["ci-insts"] = true;
194 Features["dot1-insts"] = true;
195 Features["dot2-insts"] = true;
196 Features["dot5-insts"] = true;
197 Features["dot6-insts"] = true;
198 Features["dot7-insts"] = true;
199 Features["dl-insts"] = true;
200 Features["flat-address-space"] = true;
201 Features["16-bit-insts"] = true;
202 Features["dpp"] = true;
203 Features["gfx8-insts"] = true;
204 Features["gfx9-insts"] = true;
205 Features["gfx10-insts"] = true;
206 Features["gfx10-3-insts"] = true;
207 Features["s-memrealtime"] = true;
208 Features["s-memtime-inst"] = true;
209 break;
210 case GK_GFX1012:
211 case GK_GFX1011:
212 Features["dot1-insts"] = true;
213 Features["dot2-insts"] = true;
214 Features["dot5-insts"] = true;
215 Features["dot6-insts"] = true;
216 Features["dot7-insts"] = true;
217 LLVM_FALLTHROUGH;
218 case GK_GFX1013:
219 case GK_GFX1010:
220 Features["dl-insts"] = true;
221 Features["ci-insts"] = true;
222 Features["flat-address-space"] = true;
223 Features["16-bit-insts"] = true;
224 Features["dpp"] = true;
225 Features["gfx8-insts"] = true;
226 Features["gfx9-insts"] = true;
227 Features["gfx10-insts"] = true;
228 Features["s-memrealtime"] = true;
229 Features["s-memtime-inst"] = true;
230 break;
231 case GK_GFX90A:
232 Features["gfx90a-insts"] = true;
233 LLVM_FALLTHROUGH;
234 case GK_GFX908:
235 Features["dot3-insts"] = true;
236 Features["dot4-insts"] = true;
237 Features["dot5-insts"] = true;
238 Features["dot6-insts"] = true;
239 Features["mai-insts"] = true;
240 LLVM_FALLTHROUGH;
241 case GK_GFX906:
242 Features["dl-insts"] = true;
243 Features["dot1-insts"] = true;
244 Features["dot2-insts"] = true;
245 Features["dot7-insts"] = true;
246 LLVM_FALLTHROUGH;
247 case GK_GFX90C:
248 case GK_GFX909:
249 case GK_GFX904:
250 case GK_GFX902:
251 case GK_GFX900:
252 Features["gfx9-insts"] = true;
253 LLVM_FALLTHROUGH;
254 case GK_GFX810:
255 case GK_GFX805:
256 case GK_GFX803:
257 case GK_GFX802:
258 case GK_GFX801:
259 Features["gfx8-insts"] = true;
260 Features["16-bit-insts"] = true;
261 Features["dpp"] = true;
262 Features["s-memrealtime"] = true;
263 LLVM_FALLTHROUGH;
264 case GK_GFX705:
265 case GK_GFX704:
266 case GK_GFX703:
267 case GK_GFX702:
268 case GK_GFX701:
269 case GK_GFX700:
270 Features["ci-insts"] = true;
271 Features["flat-address-space"] = true;
272 LLVM_FALLTHROUGH;
273 case GK_GFX602:
274 case GK_GFX601:
275 case GK_GFX600:
276 Features["s-memtime-inst"] = true;
277 break;
278 case GK_NONE:
279 break;
280 default:
281 llvm_unreachable("Unhandled GPU!");
282 }
283 } else {
284 if (CPU.empty())
285 CPU = "r600";
286
287 switch (llvm::AMDGPU::parseArchR600(CPU)) {
288 case GK_CAYMAN:
289 case GK_CYPRESS:
290 case GK_RV770:
291 case GK_RV670:
292 // TODO: Add fp64 when implemented.
293 break;
294 case GK_TURKS:
295 case GK_CAICOS:
296 case GK_BARTS:
297 case GK_SUMO:
298 case GK_REDWOOD:
299 case GK_JUNIPER:
300 case GK_CEDAR:
301 case GK_RV730:
302 case GK_RV710:
303 case GK_RS880:
304 case GK_R630:
305 case GK_R600:
306 break;
307 default:
308 llvm_unreachable("Unhandled GPU!");
309 }
310 }
311
312 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
313 }
314
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const315 void AMDGPUTargetInfo::fillValidCPUList(
316 SmallVectorImpl<StringRef> &Values) const {
317 if (isAMDGCN(getTriple()))
318 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
319 else
320 llvm::AMDGPU::fillValidArchListR600(Values);
321 }
322
setAddressSpaceMap(bool DefaultIsPrivate)323 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
324 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
325 }
326
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)327 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
328 const TargetOptions &Opts)
329 : TargetInfo(Triple),
330 GPUKind(isAMDGCN(Triple) ?
331 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
332 llvm::AMDGPU::parseArchR600(Opts.CPU)),
333 GPUFeatures(isAMDGCN(Triple) ?
334 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
335 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
336 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
337 : DataLayoutStringR600);
338 GridValues = llvm::omp::AMDGPUGpuGridValues;
339
340 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
341 !isAMDGCN(Triple));
342 UseAddrSpaceMapMangling = true;
343
344 HasLegalHalfType = true;
345 HasFloat16 = true;
346 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
347 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
348
349 // Set pointer width and alignment for target address space 0.
350 PointerWidth = PointerAlign = getPointerWidthV(Generic);
351 if (getMaxPointerWidth() == 64) {
352 LongWidth = LongAlign = 64;
353 SizeType = UnsignedLong;
354 PtrDiffType = SignedLong;
355 IntPtrType = SignedLong;
356 }
357
358 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
359 }
360
adjust(DiagnosticsEngine & Diags,LangOptions & Opts)361 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
362 TargetInfo::adjust(Diags, Opts);
363 // ToDo: There are still a few places using default address space as private
364 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
365 // can be removed from the following line.
366 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
367 !isAMDGCN(getTriple()));
368 }
369
getTargetBuiltins() const370 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
371 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
372 Builtin::FirstTSBuiltin);
373 }
374
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const375 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
376 MacroBuilder &Builder) const {
377 Builder.defineMacro("__AMD__");
378 Builder.defineMacro("__AMDGPU__");
379
380 if (isAMDGCN(getTriple()))
381 Builder.defineMacro("__AMDGCN__");
382 else
383 Builder.defineMacro("__R600__");
384
385 if (GPUKind != llvm::AMDGPU::GK_NONE) {
386 StringRef CanonName = isAMDGCN(getTriple()) ?
387 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
388 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
389 if (isAMDGCN(getTriple())) {
390 Builder.defineMacro("__amdgcn_processor__",
391 Twine("\"") + Twine(CanonName) + Twine("\""));
392 Builder.defineMacro("__amdgcn_target_id__",
393 Twine("\"") + Twine(getTargetID().getValue()) +
394 Twine("\""));
395 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
396 auto Loc = OffloadArchFeatures.find(F);
397 if (Loc != OffloadArchFeatures.end()) {
398 std::string NewF = F.str();
399 std::replace(NewF.begin(), NewF.end(), '-', '_');
400 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
401 Twine("__"),
402 Loc->second ? "1" : "0");
403 }
404 }
405 }
406 }
407
408 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
409 // removed in the near future.
410 if (hasFMAF())
411 Builder.defineMacro("__HAS_FMAF__");
412 if (hasFastFMAF())
413 Builder.defineMacro("FP_FAST_FMAF");
414 if (hasLDEXPF())
415 Builder.defineMacro("__HAS_LDEXPF__");
416 if (hasFP64())
417 Builder.defineMacro("__HAS_FP64__");
418 if (hasFastFMA())
419 Builder.defineMacro("FP_FAST_FMA");
420
421 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
422 }
423
setAuxTarget(const TargetInfo * Aux)424 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
425 assert(HalfFormat == Aux->HalfFormat);
426 assert(FloatFormat == Aux->FloatFormat);
427 assert(DoubleFormat == Aux->DoubleFormat);
428
429 // On x86_64 long double is 80-bit extended precision format, which is
430 // not supported by AMDGPU. 128-bit floating point format is also not
431 // supported by AMDGPU. Therefore keep its own format for these two types.
432 auto SaveLongDoubleFormat = LongDoubleFormat;
433 auto SaveFloat128Format = Float128Format;
434 copyAuxTarget(Aux);
435 LongDoubleFormat = SaveLongDoubleFormat;
436 Float128Format = SaveFloat128Format;
437 // For certain builtin types support on the host target, claim they are
438 // support to pass the compilation of the host code during the device-side
439 // compilation.
440 // FIXME: As the side effect, we also accept `__float128` uses in the device
441 // code. To rejct these builtin types supported in the host target but not in
442 // the device target, one approach would support `device_builtin` attribute
443 // so that we could tell the device builtin types from the host ones. The
444 // also solves the different representations of the same builtin type, such
445 // as `size_t` in the MSVC environment.
446 if (Aux->hasFloat128Type()) {
447 HasFloat128 = true;
448 Float128Format = DoubleFormat;
449 }
450 }
451