1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
38     "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   using namespace llvm::AMDGPU;
131 
132   // XXX - What does the member GPU mean if device name string passed here?
133   if (isAMDGCN(getTriple())) {
134     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
135     case GK_GFX1012:
136     case GK_GFX1011:
137       Features["dot1-insts"] = true;
138       Features["dot2-insts"] = true;
139       Features["dot5-insts"] = true;
140       Features["dot6-insts"] = true;
141       LLVM_FALLTHROUGH;
142     case GK_GFX1010:
143       Features["dl-insts"] = true;
144       Features["ci-insts"] = true;
145       Features["flat-address-space"] = true;
146       Features["16-bit-insts"] = true;
147       Features["dpp"] = true;
148       Features["gfx8-insts"] = true;
149       Features["gfx9-insts"] = true;
150       Features["gfx10-insts"] = true;
151       Features["s-memrealtime"] = true;
152       break;
153     case GK_GFX908:
154       Features["dot3-insts"] = true;
155       Features["dot4-insts"] = true;
156       Features["dot5-insts"] = true;
157       Features["dot6-insts"] = true;
158       LLVM_FALLTHROUGH;
159     case GK_GFX906:
160       Features["dl-insts"] = true;
161       Features["dot1-insts"] = true;
162       Features["dot2-insts"] = true;
163       LLVM_FALLTHROUGH;
164     case GK_GFX909:
165     case GK_GFX904:
166     case GK_GFX902:
167     case GK_GFX900:
168       Features["gfx9-insts"] = true;
169       LLVM_FALLTHROUGH;
170     case GK_GFX810:
171     case GK_GFX803:
172     case GK_GFX802:
173     case GK_GFX801:
174       Features["gfx8-insts"] = true;
175       Features["16-bit-insts"] = true;
176       Features["dpp"] = true;
177       Features["s-memrealtime"] = true;
178       LLVM_FALLTHROUGH;
179     case GK_GFX704:
180     case GK_GFX703:
181     case GK_GFX702:
182     case GK_GFX701:
183     case GK_GFX700:
184       Features["ci-insts"] = true;
185       Features["flat-address-space"] = true;
186       LLVM_FALLTHROUGH;
187     case GK_GFX601:
188     case GK_GFX600:
189       break;
190     case GK_NONE:
191       break;
192     default:
193       llvm_unreachable("Unhandled GPU!");
194     }
195   } else {
196     if (CPU.empty())
197       CPU = "r600";
198 
199     switch (llvm::AMDGPU::parseArchR600(CPU)) {
200     case GK_CAYMAN:
201     case GK_CYPRESS:
202     case GK_RV770:
203     case GK_RV670:
204       // TODO: Add fp64 when implemented.
205       break;
206     case GK_TURKS:
207     case GK_CAICOS:
208     case GK_BARTS:
209     case GK_SUMO:
210     case GK_REDWOOD:
211     case GK_JUNIPER:
212     case GK_CEDAR:
213     case GK_RV730:
214     case GK_RV710:
215     case GK_RS880:
216     case GK_R630:
217     case GK_R600:
218       break;
219     default:
220       llvm_unreachable("Unhandled GPU!");
221     }
222   }
223 
224   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
225 }
226 
227 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
228                                            TargetOptions &TargetOpts) const {
229   bool hasFP32Denormals = false;
230   bool hasFP64Denormals = false;
231 
232   for (auto &I : TargetOpts.FeaturesAsWritten) {
233     if (I == "+fp32-denormals" || I == "-fp32-denormals")
234       hasFP32Denormals = true;
235     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
236       hasFP64Denormals = true;
237   }
238   if (!hasFP32Denormals)
239     TargetOpts.Features.push_back(
240       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
241              ? '+' : '-') + Twine("fp32-denormals"))
242             .str());
243   // Always do not flush fp64 or fp16 denorms.
244   if (!hasFP64Denormals && hasFP64())
245     TargetOpts.Features.push_back("+fp64-fp16-denormals");
246 }
247 
248 void AMDGPUTargetInfo::fillValidCPUList(
249     SmallVectorImpl<StringRef> &Values) const {
250   if (isAMDGCN(getTriple()))
251     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
252   else
253     llvm::AMDGPU::fillValidArchListR600(Values);
254 }
255 
256 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
257   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
258 }
259 
260 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
261                                    const TargetOptions &Opts)
262     : TargetInfo(Triple),
263       GPUKind(isAMDGCN(Triple) ?
264               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
265               llvm::AMDGPU::parseArchR600(Opts.CPU)),
266       GPUFeatures(isAMDGCN(Triple) ?
267                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
268                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
269   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
270                                         : DataLayoutStringR600);
271   assert(DataLayout->getAllocaAddrSpace() == Private);
272 
273   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
274                      !isAMDGCN(Triple));
275   UseAddrSpaceMapMangling = true;
276 
277   HasLegalHalfType = true;
278   HasFloat16 = true;
279 
280   // Set pointer width and alignment for target address space 0.
281   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
282   if (getMaxPointerWidth() == 64) {
283     LongWidth = LongAlign = 64;
284     SizeType = UnsignedLong;
285     PtrDiffType = SignedLong;
286     IntPtrType = SignedLong;
287   }
288 
289   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
290 }
291 
292 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
293   TargetInfo::adjust(Opts);
294   // ToDo: There are still a few places using default address space as private
295   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
296   // can be removed from the following line.
297   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
298                      !isAMDGCN(getTriple()));
299 }
300 
301 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
302   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
303                                              Builtin::FirstTSBuiltin);
304 }
305 
306 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
307                                         MacroBuilder &Builder) const {
308   Builder.defineMacro("__AMD__");
309   Builder.defineMacro("__AMDGPU__");
310 
311   if (isAMDGCN(getTriple()))
312     Builder.defineMacro("__AMDGCN__");
313   else
314     Builder.defineMacro("__R600__");
315 
316   if (GPUKind != llvm::AMDGPU::GK_NONE) {
317     StringRef CanonName = isAMDGCN(getTriple()) ?
318       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
319     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
320   }
321 
322   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
323   // removed in the near future.
324   if (hasFMAF())
325     Builder.defineMacro("__HAS_FMAF__");
326   if (hasFastFMAF())
327     Builder.defineMacro("FP_FAST_FMAF");
328   if (hasLDEXPF())
329     Builder.defineMacro("__HAS_LDEXPF__");
330   if (hasFP64())
331     Builder.defineMacro("__HAS_FP64__");
332   if (hasFastFMA())
333     Builder.defineMacro("FP_FAST_FMA");
334 }
335 
336 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
337   assert(HalfFormat == Aux->HalfFormat);
338   assert(FloatFormat == Aux->FloatFormat);
339   assert(DoubleFormat == Aux->DoubleFormat);
340 
341   // On x86_64 long double is 80-bit extended precision format, which is
342   // not supported by AMDGPU. 128-bit floating point format is also not
343   // supported by AMDGPU. Therefore keep its own format for these two types.
344   auto SaveLongDoubleFormat = LongDoubleFormat;
345   auto SaveFloat128Format = Float128Format;
346   copyAuxTarget(Aux);
347   LongDoubleFormat = SaveLongDoubleFormat;
348   Float128Format = SaveFloat128Format;
349 }
350