1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/IR/DataLayout.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // cuda_device
49     Constant, // cuda_constant
50     Local,    // cuda_shared
51     Generic,  // ptr32_sptr
52     Generic,  // ptr32_uptr
53     Generic   // ptr64
54 };
55 
56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
57     Private,  // Default
58     Global,   // opencl_global
59     Local,    // opencl_local
60     Constant, // opencl_constant
61     Private,  // opencl_private
62     Generic,  // opencl_generic
63     Global,   // cuda_device
64     Constant, // cuda_constant
65     Local,    // cuda_shared
66     Generic,  // ptr32_sptr
67     Generic,  // ptr32_uptr
68     Generic   // ptr64
69 
70 };
71 } // namespace targets
72 } // namespace clang
73 
74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75 #define BUILTIN(ID, TYPE, ATTRS)                                               \
76   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
78   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79 #include "clang/Basic/BuiltinsAMDGPU.def"
80 };
81 
82 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
83   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127   "flat_scratch_lo", "flat_scratch_hi"
128 };
129 
130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
131   return llvm::makeArrayRef(GCCRegNames);
132 }
133 
134 bool AMDGPUTargetInfo::initFeatureMap(
135     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
136     const std::vector<std::string> &FeatureVec) const {
137 
138   using namespace llvm::AMDGPU;
139 
140   // XXX - What does the member GPU mean if device name string passed here?
141   if (isAMDGCN(getTriple())) {
142     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
143     case GK_GFX1012:
144     case GK_GFX1011:
145       Features["dot1-insts"] = true;
146       Features["dot2-insts"] = true;
147       Features["dot5-insts"] = true;
148       Features["dot6-insts"] = true;
149       LLVM_FALLTHROUGH;
150     case GK_GFX1010:
151       Features["dl-insts"] = true;
152       Features["ci-insts"] = true;
153       Features["flat-address-space"] = true;
154       Features["16-bit-insts"] = true;
155       Features["dpp"] = true;
156       Features["gfx8-insts"] = true;
157       Features["gfx9-insts"] = true;
158       Features["gfx10-insts"] = true;
159       Features["s-memrealtime"] = true;
160       break;
161     case GK_GFX908:
162       Features["dot3-insts"] = true;
163       Features["dot4-insts"] = true;
164       Features["dot5-insts"] = true;
165       Features["dot6-insts"] = true;
166       Features["mai-insts"] = true;
167       LLVM_FALLTHROUGH;
168     case GK_GFX906:
169       Features["dl-insts"] = true;
170       Features["dot1-insts"] = true;
171       Features["dot2-insts"] = true;
172       LLVM_FALLTHROUGH;
173     case GK_GFX909:
174     case GK_GFX904:
175     case GK_GFX902:
176     case GK_GFX900:
177       Features["gfx9-insts"] = true;
178       LLVM_FALLTHROUGH;
179     case GK_GFX810:
180     case GK_GFX803:
181     case GK_GFX802:
182     case GK_GFX801:
183       Features["gfx8-insts"] = true;
184       Features["16-bit-insts"] = true;
185       Features["dpp"] = true;
186       Features["s-memrealtime"] = true;
187       LLVM_FALLTHROUGH;
188     case GK_GFX704:
189     case GK_GFX703:
190     case GK_GFX702:
191     case GK_GFX701:
192     case GK_GFX700:
193       Features["ci-insts"] = true;
194       Features["flat-address-space"] = true;
195       LLVM_FALLTHROUGH;
196     case GK_GFX601:
197     case GK_GFX600:
198       break;
199     case GK_NONE:
200       break;
201     default:
202       llvm_unreachable("Unhandled GPU!");
203     }
204   } else {
205     if (CPU.empty())
206       CPU = "r600";
207 
208     switch (llvm::AMDGPU::parseArchR600(CPU)) {
209     case GK_CAYMAN:
210     case GK_CYPRESS:
211     case GK_RV770:
212     case GK_RV670:
213       // TODO: Add fp64 when implemented.
214       break;
215     case GK_TURKS:
216     case GK_CAICOS:
217     case GK_BARTS:
218     case GK_SUMO:
219     case GK_REDWOOD:
220     case GK_JUNIPER:
221     case GK_CEDAR:
222     case GK_RV730:
223     case GK_RV710:
224     case GK_RS880:
225     case GK_R630:
226     case GK_R600:
227       break;
228     default:
229       llvm_unreachable("Unhandled GPU!");
230     }
231   }
232 
233   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
234 }
235 
236 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
237                                            TargetOptions &TargetOpts) const {
238   bool hasFP32Denormals = false;
239   bool hasFP64Denormals = false;
240 
241   for (auto &I : TargetOpts.FeaturesAsWritten) {
242     if (I == "+fp32-denormals" || I == "-fp32-denormals")
243       hasFP32Denormals = true;
244     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
245       hasFP64Denormals = true;
246   }
247   if (!hasFP32Denormals)
248     TargetOpts.Features.push_back(
249       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() &&
250              CGOpts.FP32DenormalMode.Output == llvm::DenormalMode::IEEE
251              ? '+' : '-') + Twine("fp32-denormals"))
252             .str());
253   // Always do not flush fp64 or fp16 denorms.
254   if (!hasFP64Denormals && hasFP64())
255     TargetOpts.Features.push_back("+fp64-fp16-denormals");
256 }
257 
258 void AMDGPUTargetInfo::fillValidCPUList(
259     SmallVectorImpl<StringRef> &Values) const {
260   if (isAMDGCN(getTriple()))
261     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
262   else
263     llvm::AMDGPU::fillValidArchListR600(Values);
264 }
265 
266 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
267   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
268 }
269 
270 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
271                                    const TargetOptions &Opts)
272     : TargetInfo(Triple),
273       GPUKind(isAMDGCN(Triple) ?
274               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
275               llvm::AMDGPU::parseArchR600(Opts.CPU)),
276       GPUFeatures(isAMDGCN(Triple) ?
277                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
278                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
279   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
280                                         : DataLayoutStringR600);
281   assert(DataLayout->getAllocaAddrSpace() == Private);
282 
283   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
284                      !isAMDGCN(Triple));
285   UseAddrSpaceMapMangling = true;
286 
287   HasLegalHalfType = true;
288   HasFloat16 = true;
289 
290   // Set pointer width and alignment for target address space 0.
291   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
292   if (getMaxPointerWidth() == 64) {
293     LongWidth = LongAlign = 64;
294     SizeType = UnsignedLong;
295     PtrDiffType = SignedLong;
296     IntPtrType = SignedLong;
297   }
298 
299   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
300 }
301 
302 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
303   TargetInfo::adjust(Opts);
304   // ToDo: There are still a few places using default address space as private
305   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
306   // can be removed from the following line.
307   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
308                      !isAMDGCN(getTriple()));
309 }
310 
311 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
312   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
313                                              Builtin::FirstTSBuiltin);
314 }
315 
316 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
317                                         MacroBuilder &Builder) const {
318   Builder.defineMacro("__AMD__");
319   Builder.defineMacro("__AMDGPU__");
320 
321   if (isAMDGCN(getTriple()))
322     Builder.defineMacro("__AMDGCN__");
323   else
324     Builder.defineMacro("__R600__");
325 
326   if (GPUKind != llvm::AMDGPU::GK_NONE) {
327     StringRef CanonName = isAMDGCN(getTriple()) ?
328       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
329     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
330   }
331 
332   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
333   // removed in the near future.
334   if (hasFMAF())
335     Builder.defineMacro("__HAS_FMAF__");
336   if (hasFastFMAF())
337     Builder.defineMacro("FP_FAST_FMAF");
338   if (hasLDEXPF())
339     Builder.defineMacro("__HAS_LDEXPF__");
340   if (hasFP64())
341     Builder.defineMacro("__HAS_FP64__");
342   if (hasFastFMA())
343     Builder.defineMacro("FP_FAST_FMA");
344 }
345 
346 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
347   assert(HalfFormat == Aux->HalfFormat);
348   assert(FloatFormat == Aux->FloatFormat);
349   assert(DoubleFormat == Aux->DoubleFormat);
350 
351   // On x86_64 long double is 80-bit extended precision format, which is
352   // not supported by AMDGPU. 128-bit floating point format is also not
353   // supported by AMDGPU. Therefore keep its own format for these two types.
354   auto SaveLongDoubleFormat = LongDoubleFormat;
355   auto SaveFloat128Format = Float128Format;
356   copyAuxTarget(Aux);
357   LongDoubleFormat = SaveLongDoubleFormat;
358   Float128Format = SaveFloat128Format;
359 }
360