1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   using namespace llvm::AMDGPU;
131 
132   // XXX - What does the member GPU mean if device name string passed here?
133   if (isAMDGCN(getTriple())) {
134     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
135     case GK_GFX1012:
136     case GK_GFX1011:
137       Features["dot1-insts"] = true;
138       Features["dot2-insts"] = true;
139       Features["dot5-insts"] = true;
140       Features["dot6-insts"] = true;
141       LLVM_FALLTHROUGH;
142     case GK_GFX1010:
143       Features["dl-insts"] = true;
144       Features["ci-insts"] = true;
145       Features["16-bit-insts"] = true;
146       Features["dpp"] = true;
147       Features["gfx8-insts"] = true;
148       Features["gfx9-insts"] = true;
149       Features["gfx10-insts"] = true;
150       Features["s-memrealtime"] = true;
151       break;
152     case GK_GFX908:
153       Features["dot3-insts"] = true;
154       Features["dot4-insts"] = true;
155       Features["dot5-insts"] = true;
156       Features["dot6-insts"] = true;
157       LLVM_FALLTHROUGH;
158     case GK_GFX906:
159       Features["dl-insts"] = true;
160       Features["dot1-insts"] = true;
161       Features["dot2-insts"] = true;
162       LLVM_FALLTHROUGH;
163     case GK_GFX909:
164     case GK_GFX904:
165     case GK_GFX902:
166     case GK_GFX900:
167       Features["gfx9-insts"] = true;
168       LLVM_FALLTHROUGH;
169     case GK_GFX810:
170     case GK_GFX803:
171     case GK_GFX802:
172     case GK_GFX801:
173       Features["gfx8-insts"] = true;
174       Features["16-bit-insts"] = true;
175       Features["dpp"] = true;
176       Features["s-memrealtime"] = true;
177       LLVM_FALLTHROUGH;
178     case GK_GFX704:
179     case GK_GFX703:
180     case GK_GFX702:
181     case GK_GFX701:
182     case GK_GFX700:
183       Features["ci-insts"] = true;
184       LLVM_FALLTHROUGH;
185     case GK_GFX601:
186     case GK_GFX600:
187       break;
188     case GK_NONE:
189       break;
190     default:
191       llvm_unreachable("Unhandled GPU!");
192     }
193   } else {
194     if (CPU.empty())
195       CPU = "r600";
196 
197     switch (llvm::AMDGPU::parseArchR600(CPU)) {
198     case GK_CAYMAN:
199     case GK_CYPRESS:
200     case GK_RV770:
201     case GK_RV670:
202       // TODO: Add fp64 when implemented.
203       break;
204     case GK_TURKS:
205     case GK_CAICOS:
206     case GK_BARTS:
207     case GK_SUMO:
208     case GK_REDWOOD:
209     case GK_JUNIPER:
210     case GK_CEDAR:
211     case GK_RV730:
212     case GK_RV710:
213     case GK_RS880:
214     case GK_R630:
215     case GK_R600:
216       break;
217     default:
218       llvm_unreachable("Unhandled GPU!");
219     }
220   }
221 
222   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
223 }
224 
225 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
226                                            TargetOptions &TargetOpts) const {
227   bool hasFP32Denormals = false;
228   bool hasFP64Denormals = false;
229 
230   for (auto &I : TargetOpts.FeaturesAsWritten) {
231     if (I == "+fp32-denormals" || I == "-fp32-denormals")
232       hasFP32Denormals = true;
233     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
234       hasFP64Denormals = true;
235   }
236   if (!hasFP32Denormals)
237     TargetOpts.Features.push_back(
238       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
239              ? '+' : '-') + Twine("fp32-denormals"))
240             .str());
241   // Always do not flush fp64 or fp16 denorms.
242   if (!hasFP64Denormals && hasFP64())
243     TargetOpts.Features.push_back("+fp64-fp16-denormals");
244 }
245 
246 void AMDGPUTargetInfo::fillValidCPUList(
247     SmallVectorImpl<StringRef> &Values) const {
248   if (isAMDGCN(getTriple()))
249     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
250   else
251     llvm::AMDGPU::fillValidArchListR600(Values);
252 }
253 
254 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
255   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
256 }
257 
258 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
259                                    const TargetOptions &Opts)
260     : TargetInfo(Triple),
261       GPUKind(isAMDGCN(Triple) ?
262               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
263               llvm::AMDGPU::parseArchR600(Opts.CPU)),
264       GPUFeatures(isAMDGCN(Triple) ?
265                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
266                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
267   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
268                                         : DataLayoutStringR600);
269   assert(DataLayout->getAllocaAddrSpace() == Private);
270 
271   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
272                      !isAMDGCN(Triple));
273   UseAddrSpaceMapMangling = true;
274 
275   HasLegalHalfType = true;
276   HasFloat16 = true;
277 
278   // Set pointer width and alignment for target address space 0.
279   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
280   if (getMaxPointerWidth() == 64) {
281     LongWidth = LongAlign = 64;
282     SizeType = UnsignedLong;
283     PtrDiffType = SignedLong;
284     IntPtrType = SignedLong;
285   }
286 
287   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
288 }
289 
290 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
291   TargetInfo::adjust(Opts);
292   // ToDo: There are still a few places using default address space as private
293   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
294   // can be removed from the following line.
295   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
296                      !isAMDGCN(getTriple()));
297 }
298 
299 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
300   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
301                                              Builtin::FirstTSBuiltin);
302 }
303 
304 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
305                                         MacroBuilder &Builder) const {
306   Builder.defineMacro("__AMD__");
307   Builder.defineMacro("__AMDGPU__");
308 
309   if (isAMDGCN(getTriple()))
310     Builder.defineMacro("__AMDGCN__");
311   else
312     Builder.defineMacro("__R600__");
313 
314   if (GPUKind != llvm::AMDGPU::GK_NONE) {
315     StringRef CanonName = isAMDGCN(getTriple()) ?
316       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
317     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
318   }
319 
320   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
321   // removed in the near future.
322   if (hasFMAF())
323     Builder.defineMacro("__HAS_FMAF__");
324   if (hasFastFMAF())
325     Builder.defineMacro("FP_FAST_FMAF");
326   if (hasLDEXPF())
327     Builder.defineMacro("__HAS_LDEXPF__");
328   if (hasFP64())
329     Builder.defineMacro("__HAS_FP64__");
330   if (hasFastFMA())
331     Builder.defineMacro("FP_FAST_FMA");
332 }
333 
334 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
335   assert(HalfFormat == Aux->HalfFormat);
336   assert(FloatFormat == Aux->FloatFormat);
337   assert(DoubleFormat == Aux->DoubleFormat);
338 
339   // On x86_64 long double is 80-bit extended precision format, which is
340   // not supported by AMDGPU. 128-bit floating point format is also not
341   // supported by AMDGPU. Therefore keep its own format for these two types.
342   auto SaveLongDoubleFormat = LongDoubleFormat;
343   auto SaveFloat128Format = Float128Format;
344   copyAuxTarget(Aux);
345   LongDoubleFormat = SaveLongDoubleFormat;
346   Float128Format = SaveFloat128Format;
347 }
348