1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   using namespace llvm::AMDGPU;
131 
132   // XXX - What does the member GPU mean if device name string passed here?
133   if (isAMDGCN(getTriple())) {
134     if (CPU.empty())
135       CPU = "gfx600";
136 
137     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138     case GK_GFX1012:
139     case GK_GFX1011:
140       Features["dot1-insts"] = true;
141       Features["dot2-insts"] = true;
142       Features["dot5-insts"] = true;
143       Features["dot6-insts"] = true;
144       LLVM_FALLTHROUGH;
145     case GK_GFX1010:
146       Features["dl-insts"] = true;
147       Features["16-bit-insts"] = true;
148       Features["dpp"] = true;
149       Features["gfx9-insts"] = true;
150       Features["gfx10-insts"] = true;
151       Features["s-memrealtime"] = true;
152       break;
153     case GK_GFX906:
154       Features["dl-insts"] = true;
155       Features["dot1-insts"] = true;
156       Features["dot2-insts"] = true;
157       LLVM_FALLTHROUGH;
158     case GK_GFX909:
159     case GK_GFX904:
160     case GK_GFX902:
161     case GK_GFX900:
162       Features["gfx9-insts"] = true;
163       LLVM_FALLTHROUGH;
164     case GK_GFX810:
165     case GK_GFX803:
166     case GK_GFX802:
167     case GK_GFX801:
168       Features["gfx8-insts"] = true;
169       Features["16-bit-insts"] = true;
170       Features["dpp"] = true;
171       Features["s-memrealtime"] = true;
172       LLVM_FALLTHROUGH;
173     case GK_GFX704:
174     case GK_GFX703:
175     case GK_GFX702:
176     case GK_GFX701:
177     case GK_GFX700:
178       Features["ci-insts"] = true;
179       LLVM_FALLTHROUGH;
180     case GK_GFX601:
181     case GK_GFX600:
182       break;
183     case GK_NONE:
184       return false;
185     default:
186       llvm_unreachable("Unhandled GPU!");
187     }
188   } else {
189     if (CPU.empty())
190       CPU = "r600";
191 
192     switch (llvm::AMDGPU::parseArchR600(CPU)) {
193     case GK_CAYMAN:
194     case GK_CYPRESS:
195     case GK_RV770:
196     case GK_RV670:
197       // TODO: Add fp64 when implemented.
198       break;
199     case GK_TURKS:
200     case GK_CAICOS:
201     case GK_BARTS:
202     case GK_SUMO:
203     case GK_REDWOOD:
204     case GK_JUNIPER:
205     case GK_CEDAR:
206     case GK_RV730:
207     case GK_RV710:
208     case GK_RS880:
209     case GK_R630:
210     case GK_R600:
211       break;
212     default:
213       llvm_unreachable("Unhandled GPU!");
214     }
215   }
216 
217   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
218 }
219 
220 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
221                                            TargetOptions &TargetOpts) const {
222   bool hasFP32Denormals = false;
223   bool hasFP64Denormals = false;
224 
225   for (auto &I : TargetOpts.FeaturesAsWritten) {
226     if (I == "+fp32-denormals" || I == "-fp32-denormals")
227       hasFP32Denormals = true;
228     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
229       hasFP64Denormals = true;
230   }
231   if (!hasFP32Denormals)
232     TargetOpts.Features.push_back(
233       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
234              ? '+' : '-') + Twine("fp32-denormals"))
235             .str());
236   // Always do not flush fp64 or fp16 denorms.
237   if (!hasFP64Denormals && hasFP64())
238     TargetOpts.Features.push_back("+fp64-fp16-denormals");
239 }
240 
241 void AMDGPUTargetInfo::fillValidCPUList(
242     SmallVectorImpl<StringRef> &Values) const {
243   if (isAMDGCN(getTriple()))
244     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
245   else
246     llvm::AMDGPU::fillValidArchListR600(Values);
247 }
248 
249 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
250   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
251 }
252 
253 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
254                                    const TargetOptions &Opts)
255     : TargetInfo(Triple),
256       GPUKind(isAMDGCN(Triple) ?
257               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
258               llvm::AMDGPU::parseArchR600(Opts.CPU)),
259       GPUFeatures(isAMDGCN(Triple) ?
260                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
261                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
262   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
263                                         : DataLayoutStringR600);
264   assert(DataLayout->getAllocaAddrSpace() == Private);
265 
266   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
267                      !isAMDGCN(Triple));
268   UseAddrSpaceMapMangling = true;
269 
270   HasLegalHalfType = true;
271   HasFloat16 = true;
272 
273   // Set pointer width and alignment for target address space 0.
274   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
275   if (getMaxPointerWidth() == 64) {
276     LongWidth = LongAlign = 64;
277     SizeType = UnsignedLong;
278     PtrDiffType = SignedLong;
279     IntPtrType = SignedLong;
280   }
281 
282   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
283 }
284 
285 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
286   TargetInfo::adjust(Opts);
287   // ToDo: There are still a few places using default address space as private
288   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
289   // can be removed from the following line.
290   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
291                      !isAMDGCN(getTriple()));
292 }
293 
294 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
295   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
296                                              Builtin::FirstTSBuiltin);
297 }
298 
299 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
300                                         MacroBuilder &Builder) const {
301   Builder.defineMacro("__AMD__");
302   Builder.defineMacro("__AMDGPU__");
303 
304   if (isAMDGCN(getTriple()))
305     Builder.defineMacro("__AMDGCN__");
306   else
307     Builder.defineMacro("__R600__");
308 
309   if (GPUKind != llvm::AMDGPU::GK_NONE) {
310     StringRef CanonName = isAMDGCN(getTriple()) ?
311       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
312     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
313   }
314 
315   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
316   // removed in the near future.
317   if (hasFMAF())
318     Builder.defineMacro("__HAS_FMAF__");
319   if (hasFastFMAF())
320     Builder.defineMacro("FP_FAST_FMAF");
321   if (hasLDEXPF())
322     Builder.defineMacro("__HAS_LDEXPF__");
323   if (hasFP64())
324     Builder.defineMacro("__HAS_FP64__");
325   if (hasFastFMA())
326     Builder.defineMacro("FP_FAST_FMA");
327 }
328 
329 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
330   assert(HalfFormat == Aux->HalfFormat);
331   assert(FloatFormat == Aux->FloatFormat);
332   assert(DoubleFormat == Aux->DoubleFormat);
333 
334   // On x86_64 long double is 80-bit extended precision format, which is
335   // not supported by AMDGPU. 128-bit floating point format is also not
336   // supported by AMDGPU. Therefore keep its own format for these two types.
337   auto SaveLongDoubleFormat = LongDoubleFormat;
338   auto SaveFloat128Format = Float128Format;
339   copyAuxTarget(Aux);
340   LongDoubleFormat = SaveLongDoubleFormat;
341   Float128Format = SaveFloat128Format;
342 }
343