1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   using namespace llvm::AMDGPU;
131 
132   // XXX - What does the member GPU mean if device name string passed here?
133   if (isAMDGCN(getTriple())) {
134     if (CPU.empty())
135       CPU = "gfx600";
136 
137     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138     case GK_GFX906:
139       Features["dl-insts"] = true;
140       Features["dot1-insts"] = true;
141       Features["dot2-insts"] = true;
142       LLVM_FALLTHROUGH;
143     case GK_GFX909:
144     case GK_GFX904:
145     case GK_GFX902:
146     case GK_GFX900:
147       Features["gfx9-insts"] = true;
148       LLVM_FALLTHROUGH;
149     case GK_GFX810:
150     case GK_GFX803:
151     case GK_GFX802:
152     case GK_GFX801:
153       Features["gfx8-insts"] = true;
154       Features["16-bit-insts"] = true;
155       Features["dpp"] = true;
156       Features["s-memrealtime"] = true;
157       LLVM_FALLTHROUGH;
158     case GK_GFX704:
159     case GK_GFX703:
160     case GK_GFX702:
161     case GK_GFX701:
162     case GK_GFX700:
163       Features["ci-insts"] = true;
164       LLVM_FALLTHROUGH;
165     case GK_GFX601:
166     case GK_GFX600:
167       break;
168     case GK_NONE:
169       return false;
170     default:
171       llvm_unreachable("Unhandled GPU!");
172     }
173   } else {
174     if (CPU.empty())
175       CPU = "r600";
176 
177     switch (llvm::AMDGPU::parseArchR600(CPU)) {
178     case GK_CAYMAN:
179     case GK_CYPRESS:
180     case GK_RV770:
181     case GK_RV670:
182       // TODO: Add fp64 when implemented.
183       break;
184     case GK_TURKS:
185     case GK_CAICOS:
186     case GK_BARTS:
187     case GK_SUMO:
188     case GK_REDWOOD:
189     case GK_JUNIPER:
190     case GK_CEDAR:
191     case GK_RV730:
192     case GK_RV710:
193     case GK_RS880:
194     case GK_R630:
195     case GK_R600:
196       break;
197     default:
198       llvm_unreachable("Unhandled GPU!");
199     }
200   }
201 
202   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
203 }
204 
205 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
206                                            TargetOptions &TargetOpts) const {
207   bool hasFP32Denormals = false;
208   bool hasFP64Denormals = false;
209 
210   for (auto &I : TargetOpts.FeaturesAsWritten) {
211     if (I == "+fp32-denormals" || I == "-fp32-denormals")
212       hasFP32Denormals = true;
213     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
214       hasFP64Denormals = true;
215   }
216   if (!hasFP32Denormals)
217     TargetOpts.Features.push_back(
218       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
219              ? '+' : '-') + Twine("fp32-denormals"))
220             .str());
221   // Always do not flush fp64 or fp16 denorms.
222   if (!hasFP64Denormals && hasFP64())
223     TargetOpts.Features.push_back("+fp64-fp16-denormals");
224 }
225 
226 void AMDGPUTargetInfo::fillValidCPUList(
227     SmallVectorImpl<StringRef> &Values) const {
228   if (isAMDGCN(getTriple()))
229     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
230   else
231     llvm::AMDGPU::fillValidArchListR600(Values);
232 }
233 
234 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
235   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
236 }
237 
238 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
239                                    const TargetOptions &Opts)
240     : TargetInfo(Triple),
241       GPUKind(isAMDGCN(Triple) ?
242               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
243               llvm::AMDGPU::parseArchR600(Opts.CPU)),
244       GPUFeatures(isAMDGCN(Triple) ?
245                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
246                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
247   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
248                                         : DataLayoutStringR600);
249   assert(DataLayout->getAllocaAddrSpace() == Private);
250 
251   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
252                      !isAMDGCN(Triple));
253   UseAddrSpaceMapMangling = true;
254 
255   HasLegalHalfType = true;
256   HasFloat16 = true;
257 
258   // Set pointer width and alignment for target address space 0.
259   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
260   if (getMaxPointerWidth() == 64) {
261     LongWidth = LongAlign = 64;
262     SizeType = UnsignedLong;
263     PtrDiffType = SignedLong;
264     IntPtrType = SignedLong;
265   }
266 
267   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
268 }
269 
270 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
271   TargetInfo::adjust(Opts);
272   // ToDo: There are still a few places using default address space as private
273   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
274   // can be removed from the following line.
275   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
276                      !isAMDGCN(getTriple()));
277 }
278 
279 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
280   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
281                                              Builtin::FirstTSBuiltin);
282 }
283 
284 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
285                                         MacroBuilder &Builder) const {
286   Builder.defineMacro("__AMD__");
287   Builder.defineMacro("__AMDGPU__");
288 
289   if (isAMDGCN(getTriple()))
290     Builder.defineMacro("__AMDGCN__");
291   else
292     Builder.defineMacro("__R600__");
293 
294   if (GPUKind != llvm::AMDGPU::GK_NONE) {
295     StringRef CanonName = isAMDGCN(getTriple()) ?
296       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
297     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
298   }
299 
300   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
301   // removed in the near future.
302   if (hasFMAF())
303     Builder.defineMacro("__HAS_FMAF__");
304   if (hasFastFMAF())
305     Builder.defineMacro("FP_FAST_FMAF");
306   if (hasLDEXPF())
307     Builder.defineMacro("__HAS_LDEXPF__");
308   if (hasFP64())
309     Builder.defineMacro("__HAS_FP64__");
310   if (hasFastFMA())
311     Builder.defineMacro("FP_FAST_FMA");
312 }
313 
314 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
315   assert(HalfFormat == Aux->HalfFormat);
316   assert(FloatFormat == Aux->FloatFormat);
317   assert(DoubleFormat == Aux->DoubleFormat);
318 
319   // On x86_64 long double is 80-bit extended precision format, which is
320   // not supported by AMDGPU. 128-bit floating point format is also not
321   // supported by AMDGPU. Therefore keep its own format for these two types.
322   auto SaveLongDoubleFormat = LongDoubleFormat;
323   auto SaveFloat128Format = Float128Format;
324   copyAuxTarget(Aux);
325   LongDoubleFormat = SaveLongDoubleFormat;
326   Float128Format = SaveFloat128Format;
327 }
328