1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   using namespace llvm::AMDGPU;
131 
132   // XXX - What does the member GPU mean if device name string passed here?
133   if (isAMDGCN(getTriple())) {
134     if (CPU.empty())
135       CPU = "gfx600";
136 
137     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138     case GK_GFX1010:
139       Features["dl-insts"] = true;
140       Features["16-bit-insts"] = true;
141       Features["dpp"] = true;
142       Features["gfx9-insts"] = true;
143       Features["gfx10-insts"] = true;
144       Features["s-memrealtime"] = true;
145       break;
146     case GK_GFX906:
147       Features["dl-insts"] = true;
148       Features["dot1-insts"] = true;
149       Features["dot2-insts"] = true;
150       LLVM_FALLTHROUGH;
151     case GK_GFX909:
152     case GK_GFX904:
153     case GK_GFX902:
154     case GK_GFX900:
155       Features["gfx9-insts"] = true;
156       LLVM_FALLTHROUGH;
157     case GK_GFX810:
158     case GK_GFX803:
159     case GK_GFX802:
160     case GK_GFX801:
161       Features["gfx8-insts"] = true;
162       Features["16-bit-insts"] = true;
163       Features["dpp"] = true;
164       Features["s-memrealtime"] = true;
165       LLVM_FALLTHROUGH;
166     case GK_GFX704:
167     case GK_GFX703:
168     case GK_GFX702:
169     case GK_GFX701:
170     case GK_GFX700:
171       Features["ci-insts"] = true;
172       LLVM_FALLTHROUGH;
173     case GK_GFX601:
174     case GK_GFX600:
175       break;
176     case GK_NONE:
177       return false;
178     default:
179       llvm_unreachable("Unhandled GPU!");
180     }
181   } else {
182     if (CPU.empty())
183       CPU = "r600";
184 
185     switch (llvm::AMDGPU::parseArchR600(CPU)) {
186     case GK_CAYMAN:
187     case GK_CYPRESS:
188     case GK_RV770:
189     case GK_RV670:
190       // TODO: Add fp64 when implemented.
191       break;
192     case GK_TURKS:
193     case GK_CAICOS:
194     case GK_BARTS:
195     case GK_SUMO:
196     case GK_REDWOOD:
197     case GK_JUNIPER:
198     case GK_CEDAR:
199     case GK_RV730:
200     case GK_RV710:
201     case GK_RS880:
202     case GK_R630:
203     case GK_R600:
204       break;
205     default:
206       llvm_unreachable("Unhandled GPU!");
207     }
208   }
209 
210   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
211 }
212 
213 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
214                                            TargetOptions &TargetOpts) const {
215   bool hasFP32Denormals = false;
216   bool hasFP64Denormals = false;
217 
218   for (auto &I : TargetOpts.FeaturesAsWritten) {
219     if (I == "+fp32-denormals" || I == "-fp32-denormals")
220       hasFP32Denormals = true;
221     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
222       hasFP64Denormals = true;
223   }
224   if (!hasFP32Denormals)
225     TargetOpts.Features.push_back(
226       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
227              ? '+' : '-') + Twine("fp32-denormals"))
228             .str());
229   // Always do not flush fp64 or fp16 denorms.
230   if (!hasFP64Denormals && hasFP64())
231     TargetOpts.Features.push_back("+fp64-fp16-denormals");
232 }
233 
234 void AMDGPUTargetInfo::fillValidCPUList(
235     SmallVectorImpl<StringRef> &Values) const {
236   if (isAMDGCN(getTriple()))
237     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
238   else
239     llvm::AMDGPU::fillValidArchListR600(Values);
240 }
241 
242 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
243   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
244 }
245 
246 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
247                                    const TargetOptions &Opts)
248     : TargetInfo(Triple),
249       GPUKind(isAMDGCN(Triple) ?
250               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
251               llvm::AMDGPU::parseArchR600(Opts.CPU)),
252       GPUFeatures(isAMDGCN(Triple) ?
253                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
254                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
255   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
256                                         : DataLayoutStringR600);
257   assert(DataLayout->getAllocaAddrSpace() == Private);
258 
259   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
260                      !isAMDGCN(Triple));
261   UseAddrSpaceMapMangling = true;
262 
263   HasLegalHalfType = true;
264   HasFloat16 = true;
265 
266   // Set pointer width and alignment for target address space 0.
267   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
268   if (getMaxPointerWidth() == 64) {
269     LongWidth = LongAlign = 64;
270     SizeType = UnsignedLong;
271     PtrDiffType = SignedLong;
272     IntPtrType = SignedLong;
273   }
274 
275   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
276 }
277 
278 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
279   TargetInfo::adjust(Opts);
280   // ToDo: There are still a few places using default address space as private
281   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
282   // can be removed from the following line.
283   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
284                      !isAMDGCN(getTriple()));
285 }
286 
287 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
288   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
289                                              Builtin::FirstTSBuiltin);
290 }
291 
292 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
293                                         MacroBuilder &Builder) const {
294   Builder.defineMacro("__AMD__");
295   Builder.defineMacro("__AMDGPU__");
296 
297   if (isAMDGCN(getTriple()))
298     Builder.defineMacro("__AMDGCN__");
299   else
300     Builder.defineMacro("__R600__");
301 
302   if (GPUKind != llvm::AMDGPU::GK_NONE) {
303     StringRef CanonName = isAMDGCN(getTriple()) ?
304       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
305     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
306   }
307 
308   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
309   // removed in the near future.
310   if (hasFMAF())
311     Builder.defineMacro("__HAS_FMAF__");
312   if (hasFastFMAF())
313     Builder.defineMacro("FP_FAST_FMAF");
314   if (hasLDEXPF())
315     Builder.defineMacro("__HAS_LDEXPF__");
316   if (hasFP64())
317     Builder.defineMacro("__HAS_FP64__");
318   if (hasFastFMA())
319     Builder.defineMacro("FP_FAST_FMA");
320 }
321 
322 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
323   assert(HalfFormat == Aux->HalfFormat);
324   assert(FloatFormat == Aux->FloatFormat);
325   assert(DoubleFormat == Aux->DoubleFormat);
326 
327   // On x86_64 long double is 80-bit extended precision format, which is
328   // not supported by AMDGPU. 128-bit floating point format is also not
329   // supported by AMDGPU. Therefore keep its own format for these two types.
330   auto SaveLongDoubleFormat = LongDoubleFormat;
331   auto SaveFloat128Format = Float128Format;
332   copyAuxTarget(Aux);
333   LongDoubleFormat = SaveLongDoubleFormat;
334   Float128Format = SaveFloat128Format;
335 }
336