1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
38 
39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
40     Generic,  // Default
41     Global,   // opencl_global
42     Local,    // opencl_local
43     Constant, // opencl_constant
44     Private,  // opencl_private
45     Generic,  // opencl_generic
46     Global,   // cuda_device
47     Constant, // cuda_constant
48     Local     // cuda_shared
49 };
50 
51 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
52     Private,  // Default
53     Global,   // opencl_global
54     Local,    // opencl_local
55     Constant, // opencl_constant
56     Private,  // opencl_private
57     Generic,  // opencl_generic
58     Global,   // cuda_device
59     Constant, // cuda_constant
60     Local     // cuda_shared
61 };
62 } // namespace targets
63 } // namespace clang
64 
65 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
66 #define BUILTIN(ID, TYPE, ATTRS)                                               \
67   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
68 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
69   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
70 #include "clang/Basic/BuiltinsAMDGPU.def"
71 };
72 
73 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
74   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
75   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
76   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
77   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
78   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
79   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
80   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
81   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
82   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
83   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
84   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
85   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
86   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
87   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
88   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
89   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
90   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
91   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
92   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
93   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
94   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
95   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
96   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
97   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
98   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
99   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
100   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
101   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
102   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
103   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
104   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
105   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
106   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
107   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
108   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
109   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
110   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
111   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
112   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
113   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
114   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
115   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
116   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
117   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
118   "flat_scratch_lo", "flat_scratch_hi"
119 };
120 
121 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
122   return llvm::makeArrayRef(GCCRegNames);
123 }
124 
125 bool AMDGPUTargetInfo::initFeatureMap(
126     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
127     const std::vector<std::string> &FeatureVec) const {
128 
129   using namespace llvm::AMDGPU;
130 
131   // XXX - What does the member GPU mean if device name string passed here?
132   if (isAMDGCN(getTriple())) {
133     if (CPU.empty())
134       CPU = "gfx600";
135 
136     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
137     case GK_GFX906:
138       Features["dl-insts"] = true;
139       Features["dot-insts"] = true;
140       LLVM_FALLTHROUGH;
141     case GK_GFX909:
142     case GK_GFX904:
143     case GK_GFX902:
144     case GK_GFX900:
145       Features["gfx9-insts"] = true;
146       LLVM_FALLTHROUGH;
147     case GK_GFX810:
148     case GK_GFX803:
149     case GK_GFX802:
150     case GK_GFX801:
151       Features["vi-insts"] = true;
152       Features["16-bit-insts"] = true;
153       Features["dpp"] = true;
154       Features["s-memrealtime"] = true;
155       LLVM_FALLTHROUGH;
156     case GK_GFX704:
157     case GK_GFX703:
158     case GK_GFX702:
159     case GK_GFX701:
160     case GK_GFX700:
161       Features["ci-insts"] = true;
162       LLVM_FALLTHROUGH;
163     case GK_GFX601:
164     case GK_GFX600:
165       break;
166     case GK_NONE:
167       return false;
168     default:
169       llvm_unreachable("Unhandled GPU!");
170     }
171   } else {
172     if (CPU.empty())
173       CPU = "r600";
174 
175     switch (llvm::AMDGPU::parseArchR600(CPU)) {
176     case GK_CAYMAN:
177     case GK_CYPRESS:
178     case GK_RV770:
179     case GK_RV670:
180       // TODO: Add fp64 when implemented.
181       break;
182     case GK_TURKS:
183     case GK_CAICOS:
184     case GK_BARTS:
185     case GK_SUMO:
186     case GK_REDWOOD:
187     case GK_JUNIPER:
188     case GK_CEDAR:
189     case GK_RV730:
190     case GK_RV710:
191     case GK_RS880:
192     case GK_R630:
193     case GK_R600:
194       break;
195     default:
196       llvm_unreachable("Unhandled GPU!");
197     }
198   }
199 
200   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
201 }
202 
203 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
204                                            TargetOptions &TargetOpts) const {
205   bool hasFP32Denormals = false;
206   bool hasFP64Denormals = false;
207 
208   for (auto &I : TargetOpts.FeaturesAsWritten) {
209     if (I == "+fp32-denormals" || I == "-fp32-denormals")
210       hasFP32Denormals = true;
211     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
212       hasFP64Denormals = true;
213   }
214   if (!hasFP32Denormals)
215     TargetOpts.Features.push_back(
216       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
217              ? '+' : '-') + Twine("fp32-denormals"))
218             .str());
219   // Always do not flush fp64 or fp16 denorms.
220   if (!hasFP64Denormals && hasFP64())
221     TargetOpts.Features.push_back("+fp64-fp16-denormals");
222 }
223 
224 void AMDGPUTargetInfo::fillValidCPUList(
225     SmallVectorImpl<StringRef> &Values) const {
226   if (isAMDGCN(getTriple()))
227     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
228   else
229     llvm::AMDGPU::fillValidArchListR600(Values);
230 }
231 
232 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
233   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
234 }
235 
236 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
237                                    const TargetOptions &Opts)
238     : TargetInfo(Triple),
239       GPUKind(isAMDGCN(Triple) ?
240               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
241               llvm::AMDGPU::parseArchR600(Opts.CPU)),
242       GPUFeatures(isAMDGCN(Triple) ?
243                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
244                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
245   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
246                                         : DataLayoutStringR600);
247   assert(DataLayout->getAllocaAddrSpace() == Private);
248 
249   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
250                      !isAMDGCN(Triple));
251   UseAddrSpaceMapMangling = true;
252 
253   // Set pointer width and alignment for target address space 0.
254   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
255   if (getMaxPointerWidth() == 64) {
256     LongWidth = LongAlign = 64;
257     SizeType = UnsignedLong;
258     PtrDiffType = SignedLong;
259     IntPtrType = SignedLong;
260   }
261 
262   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
263 }
264 
265 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
266   TargetInfo::adjust(Opts);
267   // ToDo: There are still a few places using default address space as private
268   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
269   // can be removed from the following line.
270   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
271                      !isAMDGCN(getTriple()));
272 }
273 
274 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
275   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
276                                              Builtin::FirstTSBuiltin);
277 }
278 
279 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
280                                         MacroBuilder &Builder) const {
281   Builder.defineMacro("__AMD__");
282   Builder.defineMacro("__AMDGPU__");
283 
284   if (isAMDGCN(getTriple()))
285     Builder.defineMacro("__AMDGCN__");
286   else
287     Builder.defineMacro("__R600__");
288 
289   if (GPUKind != llvm::AMDGPU::GK_NONE) {
290     StringRef CanonName = isAMDGCN(getTriple()) ?
291       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
292     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
293   }
294 
295   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
296   // removed in the near future.
297   if (hasFMAF())
298     Builder.defineMacro("__HAS_FMAF__");
299   if (hasFastFMAF())
300     Builder.defineMacro("FP_FAST_FMAF");
301   if (hasLDEXPF())
302     Builder.defineMacro("__HAS_LDEXPF__");
303   if (hasFP64())
304     Builder.defineMacro("__HAS_FP64__");
305   if (hasFastFMA())
306     Builder.defineMacro("FP_FAST_FMA");
307 }
308 
309 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
310   assert(HalfFormat == Aux->HalfFormat);
311   assert(FloatFormat == Aux->FloatFormat);
312   assert(DoubleFormat == Aux->DoubleFormat);
313 
314   // On x86_64 long double is 80-bit extended precision format, which is
315   // not supported by AMDGPU. 128-bit floating point format is also not
316   // supported by AMDGPU. Therefore keep its own format for these two types.
317   auto SaveLongDoubleFormat = LongDoubleFormat;
318   auto SaveFloat128Format = Float128Format;
319   copyAuxTarget(Aux);
320   LongDoubleFormat = SaveLongDoubleFormat;
321   Float128Format = SaveFloat128Format;
322 }
323