1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   using namespace llvm::AMDGPU;
131 
132   // XXX - What does the member GPU mean if device name string passed here?
133   if (isAMDGCN(getTriple())) {
134     if (CPU.empty())
135       CPU = "gfx600";
136 
137     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138     case GK_GFX1012:
139     case GK_GFX1011:
140       Features["dot1-insts"] = true;
141       Features["dot2-insts"] = true;
142       Features["dot5-insts"] = true;
143       Features["dot6-insts"] = true;
144       LLVM_FALLTHROUGH;
145     case GK_GFX1010:
146       Features["dl-insts"] = true;
147       Features["ci-insts"] = true;
148       Features["16-bit-insts"] = true;
149       Features["dpp"] = true;
150       Features["gfx8-insts"] = true;
151       Features["gfx9-insts"] = true;
152       Features["gfx10-insts"] = true;
153       Features["s-memrealtime"] = true;
154       break;
155     case GK_GFX906:
156       Features["dl-insts"] = true;
157       Features["dot1-insts"] = true;
158       Features["dot2-insts"] = true;
159       LLVM_FALLTHROUGH;
160     case GK_GFX909:
161     case GK_GFX904:
162     case GK_GFX902:
163     case GK_GFX900:
164       Features["gfx9-insts"] = true;
165       LLVM_FALLTHROUGH;
166     case GK_GFX810:
167     case GK_GFX803:
168     case GK_GFX802:
169     case GK_GFX801:
170       Features["gfx8-insts"] = true;
171       Features["16-bit-insts"] = true;
172       Features["dpp"] = true;
173       Features["s-memrealtime"] = true;
174       LLVM_FALLTHROUGH;
175     case GK_GFX704:
176     case GK_GFX703:
177     case GK_GFX702:
178     case GK_GFX701:
179     case GK_GFX700:
180       Features["ci-insts"] = true;
181       LLVM_FALLTHROUGH;
182     case GK_GFX601:
183     case GK_GFX600:
184       break;
185     case GK_NONE:
186       return false;
187     default:
188       llvm_unreachable("Unhandled GPU!");
189     }
190   } else {
191     if (CPU.empty())
192       CPU = "r600";
193 
194     switch (llvm::AMDGPU::parseArchR600(CPU)) {
195     case GK_CAYMAN:
196     case GK_CYPRESS:
197     case GK_RV770:
198     case GK_RV670:
199       // TODO: Add fp64 when implemented.
200       break;
201     case GK_TURKS:
202     case GK_CAICOS:
203     case GK_BARTS:
204     case GK_SUMO:
205     case GK_REDWOOD:
206     case GK_JUNIPER:
207     case GK_CEDAR:
208     case GK_RV730:
209     case GK_RV710:
210     case GK_RS880:
211     case GK_R630:
212     case GK_R600:
213       break;
214     default:
215       llvm_unreachable("Unhandled GPU!");
216     }
217   }
218 
219   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
220 }
221 
222 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
223                                            TargetOptions &TargetOpts) const {
224   bool hasFP32Denormals = false;
225   bool hasFP64Denormals = false;
226 
227   for (auto &I : TargetOpts.FeaturesAsWritten) {
228     if (I == "+fp32-denormals" || I == "-fp32-denormals")
229       hasFP32Denormals = true;
230     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
231       hasFP64Denormals = true;
232   }
233   if (!hasFP32Denormals)
234     TargetOpts.Features.push_back(
235       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
236              ? '+' : '-') + Twine("fp32-denormals"))
237             .str());
238   // Always do not flush fp64 or fp16 denorms.
239   if (!hasFP64Denormals && hasFP64())
240     TargetOpts.Features.push_back("+fp64-fp16-denormals");
241 }
242 
243 void AMDGPUTargetInfo::fillValidCPUList(
244     SmallVectorImpl<StringRef> &Values) const {
245   if (isAMDGCN(getTriple()))
246     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
247   else
248     llvm::AMDGPU::fillValidArchListR600(Values);
249 }
250 
251 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
252   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
253 }
254 
255 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
256                                    const TargetOptions &Opts)
257     : TargetInfo(Triple),
258       GPUKind(isAMDGCN(Triple) ?
259               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
260               llvm::AMDGPU::parseArchR600(Opts.CPU)),
261       GPUFeatures(isAMDGCN(Triple) ?
262                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
263                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
264   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
265                                         : DataLayoutStringR600);
266   assert(DataLayout->getAllocaAddrSpace() == Private);
267 
268   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
269                      !isAMDGCN(Triple));
270   UseAddrSpaceMapMangling = true;
271 
272   HasLegalHalfType = true;
273   HasFloat16 = true;
274 
275   // Set pointer width and alignment for target address space 0.
276   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
277   if (getMaxPointerWidth() == 64) {
278     LongWidth = LongAlign = 64;
279     SizeType = UnsignedLong;
280     PtrDiffType = SignedLong;
281     IntPtrType = SignedLong;
282   }
283 
284   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
285 }
286 
287 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
288   TargetInfo::adjust(Opts);
289   // ToDo: There are still a few places using default address space as private
290   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
291   // can be removed from the following line.
292   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
293                      !isAMDGCN(getTriple()));
294 }
295 
296 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
297   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
298                                              Builtin::FirstTSBuiltin);
299 }
300 
301 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
302                                         MacroBuilder &Builder) const {
303   Builder.defineMacro("__AMD__");
304   Builder.defineMacro("__AMDGPU__");
305 
306   if (isAMDGCN(getTriple()))
307     Builder.defineMacro("__AMDGCN__");
308   else
309     Builder.defineMacro("__R600__");
310 
311   if (GPUKind != llvm::AMDGPU::GK_NONE) {
312     StringRef CanonName = isAMDGCN(getTriple()) ?
313       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
314     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
315   }
316 
317   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
318   // removed in the near future.
319   if (hasFMAF())
320     Builder.defineMacro("__HAS_FMAF__");
321   if (hasFastFMAF())
322     Builder.defineMacro("FP_FAST_FMAF");
323   if (hasLDEXPF())
324     Builder.defineMacro("__HAS_LDEXPF__");
325   if (hasFP64())
326     Builder.defineMacro("__HAS_FP64__");
327   if (hasFastFMA())
328     Builder.defineMacro("FP_FAST_FMA");
329 }
330 
331 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
332   assert(HalfFormat == Aux->HalfFormat);
333   assert(FloatFormat == Aux->FloatFormat);
334   assert(DoubleFormat == Aux->DoubleFormat);
335 
336   // On x86_64 long double is 80-bit extended precision format, which is
337   // not supported by AMDGPU. 128-bit floating point format is also not
338   // supported by AMDGPU. Therefore keep its own format for these two types.
339   auto SaveLongDoubleFormat = LongDoubleFormat;
340   auto SaveFloat128Format = Float128Format;
341   copyAuxTarget(Aux);
342   LongDoubleFormat = SaveLongDoubleFormat;
343   Float128Format = SaveFloat128Format;
344 }
345