1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
38 
39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
40     Generic,  // Default
41     Global,   // opencl_global
42     Local,    // opencl_local
43     Constant, // opencl_constant
44     Private,  // opencl_private
45     Generic,  // opencl_generic
46     Global,   // cuda_device
47     Constant, // cuda_constant
48     Local     // cuda_shared
49 };
50 
51 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
52     Private,  // Default
53     Global,   // opencl_global
54     Local,    // opencl_local
55     Constant, // opencl_constant
56     Private,  // opencl_private
57     Generic,  // opencl_generic
58     Global,   // cuda_device
59     Constant, // cuda_constant
60     Local     // cuda_shared
61 };
62 } // namespace targets
63 } // namespace clang
64 
65 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
66 #define BUILTIN(ID, TYPE, ATTRS)                                               \
67   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
68 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
69   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
70 #include "clang/Basic/BuiltinsAMDGPU.def"
71 };
72 
73 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
74   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
75   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
76   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
77   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
78   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
79   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
80   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
81   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
82   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
83   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
84   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
85   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
86   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
87   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
88   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
89   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
90   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
91   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
92   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
93   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
94   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
95   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
96   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
97   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
98   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
99   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
100   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
101   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
102   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
103   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
104   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
105   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
106   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
107   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
108   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
109   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
110   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
111   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
112   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
113   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
114   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
115   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
116   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
117   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
118   "flat_scratch_lo", "flat_scratch_hi"
119 };
120 
121 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
122   return llvm::makeArrayRef(GCCRegNames);
123 }
124 
125 bool AMDGPUTargetInfo::initFeatureMap(
126     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
127     const std::vector<std::string> &FeatureVec) const {
128 
129   using namespace llvm::AMDGPU;
130 
131   // XXX - What does the member GPU mean if device name string passed here?
132   if (isAMDGCN(getTriple())) {
133     if (CPU.empty())
134       CPU = "gfx600";
135 
136     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
137     case GK_GFX906:
138       Features["dl-insts"] = true;
139       Features["dot1-insts"] = true;
140       Features["dot2-insts"] = true;
141       LLVM_FALLTHROUGH;
142     case GK_GFX909:
143     case GK_GFX904:
144     case GK_GFX902:
145     case GK_GFX900:
146       Features["gfx9-insts"] = true;
147       LLVM_FALLTHROUGH;
148     case GK_GFX810:
149     case GK_GFX803:
150     case GK_GFX802:
151     case GK_GFX801:
152       Features["vi-insts"] = true;
153       Features["16-bit-insts"] = true;
154       Features["dpp"] = true;
155       Features["s-memrealtime"] = true;
156       LLVM_FALLTHROUGH;
157     case GK_GFX704:
158     case GK_GFX703:
159     case GK_GFX702:
160     case GK_GFX701:
161     case GK_GFX700:
162       Features["ci-insts"] = true;
163       LLVM_FALLTHROUGH;
164     case GK_GFX601:
165     case GK_GFX600:
166       break;
167     case GK_NONE:
168       return false;
169     default:
170       llvm_unreachable("Unhandled GPU!");
171     }
172   } else {
173     if (CPU.empty())
174       CPU = "r600";
175 
176     switch (llvm::AMDGPU::parseArchR600(CPU)) {
177     case GK_CAYMAN:
178     case GK_CYPRESS:
179     case GK_RV770:
180     case GK_RV670:
181       // TODO: Add fp64 when implemented.
182       break;
183     case GK_TURKS:
184     case GK_CAICOS:
185     case GK_BARTS:
186     case GK_SUMO:
187     case GK_REDWOOD:
188     case GK_JUNIPER:
189     case GK_CEDAR:
190     case GK_RV730:
191     case GK_RV710:
192     case GK_RS880:
193     case GK_R630:
194     case GK_R600:
195       break;
196     default:
197       llvm_unreachable("Unhandled GPU!");
198     }
199   }
200 
201   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
202 }
203 
204 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
205                                            TargetOptions &TargetOpts) const {
206   bool hasFP32Denormals = false;
207   bool hasFP64Denormals = false;
208 
209   for (auto &I : TargetOpts.FeaturesAsWritten) {
210     if (I == "+fp32-denormals" || I == "-fp32-denormals")
211       hasFP32Denormals = true;
212     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
213       hasFP64Denormals = true;
214   }
215   if (!hasFP32Denormals)
216     TargetOpts.Features.push_back(
217       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
218              ? '+' : '-') + Twine("fp32-denormals"))
219             .str());
220   // Always do not flush fp64 or fp16 denorms.
221   if (!hasFP64Denormals && hasFP64())
222     TargetOpts.Features.push_back("+fp64-fp16-denormals");
223 }
224 
225 void AMDGPUTargetInfo::fillValidCPUList(
226     SmallVectorImpl<StringRef> &Values) const {
227   if (isAMDGCN(getTriple()))
228     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
229   else
230     llvm::AMDGPU::fillValidArchListR600(Values);
231 }
232 
233 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
234   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
235 }
236 
237 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
238                                    const TargetOptions &Opts)
239     : TargetInfo(Triple),
240       GPUKind(isAMDGCN(Triple) ?
241               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
242               llvm::AMDGPU::parseArchR600(Opts.CPU)),
243       GPUFeatures(isAMDGCN(Triple) ?
244                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
245                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
246   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
247                                         : DataLayoutStringR600);
248   assert(DataLayout->getAllocaAddrSpace() == Private);
249 
250   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
251                      !isAMDGCN(Triple));
252   UseAddrSpaceMapMangling = true;
253 
254   // Set pointer width and alignment for target address space 0.
255   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
256   if (getMaxPointerWidth() == 64) {
257     LongWidth = LongAlign = 64;
258     SizeType = UnsignedLong;
259     PtrDiffType = SignedLong;
260     IntPtrType = SignedLong;
261   }
262 
263   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
264 }
265 
266 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
267   TargetInfo::adjust(Opts);
268   // ToDo: There are still a few places using default address space as private
269   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
270   // can be removed from the following line.
271   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
272                      !isAMDGCN(getTriple()));
273 }
274 
275 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
276   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
277                                              Builtin::FirstTSBuiltin);
278 }
279 
280 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
281                                         MacroBuilder &Builder) const {
282   Builder.defineMacro("__AMD__");
283   Builder.defineMacro("__AMDGPU__");
284 
285   if (isAMDGCN(getTriple()))
286     Builder.defineMacro("__AMDGCN__");
287   else
288     Builder.defineMacro("__R600__");
289 
290   if (GPUKind != llvm::AMDGPU::GK_NONE) {
291     StringRef CanonName = isAMDGCN(getTriple()) ?
292       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
293     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
294   }
295 
296   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
297   // removed in the near future.
298   if (hasFMAF())
299     Builder.defineMacro("__HAS_FMAF__");
300   if (hasFastFMAF())
301     Builder.defineMacro("FP_FAST_FMAF");
302   if (hasLDEXPF())
303     Builder.defineMacro("__HAS_LDEXPF__");
304   if (hasFP64())
305     Builder.defineMacro("__HAS_FP64__");
306   if (hasFastFMA())
307     Builder.defineMacro("FP_FAST_FMA");
308 }
309 
310 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
311   assert(HalfFormat == Aux->HalfFormat);
312   assert(FloatFormat == Aux->FloatFormat);
313   assert(DoubleFormat == Aux->DoubleFormat);
314 
315   // On x86_64 long double is 80-bit extended precision format, which is
316   // not supported by AMDGPU. 128-bit floating point format is also not
317   // supported by AMDGPU. Therefore keep its own format for these two types.
318   auto SaveLongDoubleFormat = LongDoubleFormat;
319   auto SaveFloat128Format = Float128Format;
320   copyAuxTarget(Aux);
321   LongDoubleFormat = SaveLongDoubleFormat;
322   Float128Format = SaveFloat128Format;
323 }
324