1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/IR/DataLayout.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // cuda_device
49     Constant, // cuda_constant
50     Local,    // cuda_shared
51     Generic,  // ptr32_sptr
52     Generic,  // ptr32_uptr
53     Generic   // ptr64
54 };
55 
56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
57     Private,  // Default
58     Global,   // opencl_global
59     Local,    // opencl_local
60     Constant, // opencl_constant
61     Private,  // opencl_private
62     Generic,  // opencl_generic
63     Global,   // cuda_device
64     Constant, // cuda_constant
65     Local,    // cuda_shared
66     Generic,  // ptr32_sptr
67     Generic,  // ptr32_uptr
68     Generic   // ptr64
69 
70 };
71 } // namespace targets
72 } // namespace clang
73 
74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75 #define BUILTIN(ID, TYPE, ATTRS)                                               \
76   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
78   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79 #include "clang/Basic/BuiltinsAMDGPU.def"
80 };
81 
82 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
83   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127   "flat_scratch_lo", "flat_scratch_hi"
128 };
129 
130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
131   return llvm::makeArrayRef(GCCRegNames);
132 }
133 
134 bool AMDGPUTargetInfo::initFeatureMap(
135     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
136     const std::vector<std::string> &FeatureVec) const {
137 
138   using namespace llvm::AMDGPU;
139 
140   // XXX - What does the member GPU mean if device name string passed here?
141   if (isAMDGCN(getTriple())) {
142     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
143     case GK_GFX1012:
144     case GK_GFX1011:
145       Features["dot1-insts"] = true;
146       Features["dot2-insts"] = true;
147       Features["dot5-insts"] = true;
148       Features["dot6-insts"] = true;
149       LLVM_FALLTHROUGH;
150     case GK_GFX1010:
151       Features["dl-insts"] = true;
152       Features["ci-insts"] = true;
153       Features["flat-address-space"] = true;
154       Features["16-bit-insts"] = true;
155       Features["dpp"] = true;
156       Features["gfx8-insts"] = true;
157       Features["gfx9-insts"] = true;
158       Features["gfx10-insts"] = true;
159       Features["s-memrealtime"] = true;
160       break;
161     case GK_GFX908:
162       Features["dot3-insts"] = true;
163       Features["dot4-insts"] = true;
164       Features["dot5-insts"] = true;
165       Features["dot6-insts"] = true;
166       LLVM_FALLTHROUGH;
167     case GK_GFX906:
168       Features["dl-insts"] = true;
169       Features["dot1-insts"] = true;
170       Features["dot2-insts"] = true;
171       LLVM_FALLTHROUGH;
172     case GK_GFX909:
173     case GK_GFX904:
174     case GK_GFX902:
175     case GK_GFX900:
176       Features["gfx9-insts"] = true;
177       LLVM_FALLTHROUGH;
178     case GK_GFX810:
179     case GK_GFX803:
180     case GK_GFX802:
181     case GK_GFX801:
182       Features["gfx8-insts"] = true;
183       Features["16-bit-insts"] = true;
184       Features["dpp"] = true;
185       Features["s-memrealtime"] = true;
186       LLVM_FALLTHROUGH;
187     case GK_GFX704:
188     case GK_GFX703:
189     case GK_GFX702:
190     case GK_GFX701:
191     case GK_GFX700:
192       Features["ci-insts"] = true;
193       Features["flat-address-space"] = true;
194       LLVM_FALLTHROUGH;
195     case GK_GFX601:
196     case GK_GFX600:
197       break;
198     case GK_NONE:
199       break;
200     default:
201       llvm_unreachable("Unhandled GPU!");
202     }
203   } else {
204     if (CPU.empty())
205       CPU = "r600";
206 
207     switch (llvm::AMDGPU::parseArchR600(CPU)) {
208     case GK_CAYMAN:
209     case GK_CYPRESS:
210     case GK_RV770:
211     case GK_RV670:
212       // TODO: Add fp64 when implemented.
213       break;
214     case GK_TURKS:
215     case GK_CAICOS:
216     case GK_BARTS:
217     case GK_SUMO:
218     case GK_REDWOOD:
219     case GK_JUNIPER:
220     case GK_CEDAR:
221     case GK_RV730:
222     case GK_RV710:
223     case GK_RS880:
224     case GK_R630:
225     case GK_R600:
226       break;
227     default:
228       llvm_unreachable("Unhandled GPU!");
229     }
230   }
231 
232   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
233 }
234 
235 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
236                                            TargetOptions &TargetOpts) const {
237   bool hasFP32Denormals = false;
238   bool hasFP64Denormals = false;
239 
240   for (auto &I : TargetOpts.FeaturesAsWritten) {
241     if (I == "+fp32-denormals" || I == "-fp32-denormals")
242       hasFP32Denormals = true;
243     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
244       hasFP64Denormals = true;
245   }
246   if (!hasFP32Denormals)
247     TargetOpts.Features.push_back(
248       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() &&
249              CGOpts.FP32DenormalMode == llvm::DenormalMode::IEEE
250              ? '+' : '-') + Twine("fp32-denormals"))
251             .str());
252   // Always do not flush fp64 or fp16 denorms.
253   if (!hasFP64Denormals && hasFP64())
254     TargetOpts.Features.push_back("+fp64-fp16-denormals");
255 }
256 
257 void AMDGPUTargetInfo::fillValidCPUList(
258     SmallVectorImpl<StringRef> &Values) const {
259   if (isAMDGCN(getTriple()))
260     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
261   else
262     llvm::AMDGPU::fillValidArchListR600(Values);
263 }
264 
265 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
266   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
267 }
268 
269 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
270                                    const TargetOptions &Opts)
271     : TargetInfo(Triple),
272       GPUKind(isAMDGCN(Triple) ?
273               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
274               llvm::AMDGPU::parseArchR600(Opts.CPU)),
275       GPUFeatures(isAMDGCN(Triple) ?
276                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
277                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
278   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
279                                         : DataLayoutStringR600);
280   assert(DataLayout->getAllocaAddrSpace() == Private);
281 
282   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
283                      !isAMDGCN(Triple));
284   UseAddrSpaceMapMangling = true;
285 
286   HasLegalHalfType = true;
287   HasFloat16 = true;
288 
289   // Set pointer width and alignment for target address space 0.
290   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
291   if (getMaxPointerWidth() == 64) {
292     LongWidth = LongAlign = 64;
293     SizeType = UnsignedLong;
294     PtrDiffType = SignedLong;
295     IntPtrType = SignedLong;
296   }
297 
298   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
299 }
300 
301 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
302   TargetInfo::adjust(Opts);
303   // ToDo: There are still a few places using default address space as private
304   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
305   // can be removed from the following line.
306   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
307                      !isAMDGCN(getTriple()));
308 }
309 
310 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
311   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
312                                              Builtin::FirstTSBuiltin);
313 }
314 
315 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
316                                         MacroBuilder &Builder) const {
317   Builder.defineMacro("__AMD__");
318   Builder.defineMacro("__AMDGPU__");
319 
320   if (isAMDGCN(getTriple()))
321     Builder.defineMacro("__AMDGCN__");
322   else
323     Builder.defineMacro("__R600__");
324 
325   if (GPUKind != llvm::AMDGPU::GK_NONE) {
326     StringRef CanonName = isAMDGCN(getTriple()) ?
327       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
328     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
329   }
330 
331   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
332   // removed in the near future.
333   if (hasFMAF())
334     Builder.defineMacro("__HAS_FMAF__");
335   if (hasFastFMAF())
336     Builder.defineMacro("FP_FAST_FMAF");
337   if (hasLDEXPF())
338     Builder.defineMacro("__HAS_LDEXPF__");
339   if (hasFP64())
340     Builder.defineMacro("__HAS_FP64__");
341   if (hasFastFMA())
342     Builder.defineMacro("FP_FAST_FMA");
343 }
344 
345 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
346   assert(HalfFormat == Aux->HalfFormat);
347   assert(FloatFormat == Aux->FloatFormat);
348   assert(DoubleFormat == Aux->DoubleFormat);
349 
350   // On x86_64 long double is 80-bit extended precision format, which is
351   // not supported by AMDGPU. 128-bit floating point format is also not
352   // supported by AMDGPU. Therefore keep its own format for these two types.
353   auto SaveLongDoubleFormat = LongDoubleFormat;
354   auto SaveFloat128Format = Float128Format;
355   copyAuxTarget(Aux);
356   LongDoubleFormat = SaveLongDoubleFormat;
357   Float128Format = SaveFloat128Format;
358 }
359