1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
34 
35 static const char *const DataLayoutStringSIPrivateIsZero =
36     "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
39 
40 static const char *const DataLayoutStringSIGenericIsZero =
41     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
42     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
43     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
44 
45 static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = {
46     4, // Default
47     1, // opencl_global
48     3, // opencl_local
49     4, // opencl_constant
50     0, // opencl_private
51     4, // opencl_generic
52     1, // cuda_device
53     4, // cuda_constant
54     3  // cuda_shared
55 };
56 
57 static const LangASMap AMDGPUGenIsZeroDefIsGenMap = {
58     0, // Default
59     1, // opencl_global
60     3, // opencl_local
61     4, // opencl_constant
62     5, // opencl_private
63     0, // opencl_generic
64     1, // cuda_device
65     4, // cuda_constant
66     3  // cuda_shared
67 };
68 
69 static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = {
70     0, // Default
71     1, // opencl_global
72     3, // opencl_local
73     4, // opencl_constant
74     0, // opencl_private
75     4, // opencl_generic
76     1, // cuda_device
77     4, // cuda_constant
78     3  // cuda_shared
79 };
80 
81 static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = {
82     5, // Default
83     1, // opencl_global
84     3, // opencl_local
85     4, // opencl_constant
86     5, // opencl_private
87     0, // opencl_generic
88     1, // cuda_device
89     4, // cuda_constant
90     3  // cuda_shared
91 };
92 } // namespace targets
93 } // namespace clang
94 
95 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
96 #define BUILTIN(ID, TYPE, ATTRS)                                               \
97   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
98 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
99   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
100 #include "clang/Basic/BuiltinsAMDGPU.def"
101 };
102 
103 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
104   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
105   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
106   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
107   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
108   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
109   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
110   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
111   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
112   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
113   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
114   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
115   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
116   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
117   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
118   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
119   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
120   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
121   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
122   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
123   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
124   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
125   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
126   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
127   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
128   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
129   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
130   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
131   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
132   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
133   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
134   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
135   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
136   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
137   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
138   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
139   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
140   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
141   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
142   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
143   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
144   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
145   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
146   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
147   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
148   "flat_scratch_lo", "flat_scratch_hi"
149 };
150 
151 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
152   return llvm::makeArrayRef(GCCRegNames);
153 }
154 
155 bool AMDGPUTargetInfo::initFeatureMap(
156     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
157     const std::vector<std::string> &FeatureVec) const {
158 
159   // XXX - What does the member GPU mean if device name string passed here?
160   if (getTriple().getArch() == llvm::Triple::amdgcn) {
161     if (CPU.empty())
162       CPU = "tahiti";
163 
164     switch (parseAMDGCNName(CPU)) {
165     case GK_GFX6:
166     case GK_GFX7:
167       break;
168 
169     case GK_GFX9:
170       Features["gfx9-insts"] = true;
171       LLVM_FALLTHROUGH;
172     case GK_GFX8:
173       Features["s-memrealtime"] = true;
174       Features["16-bit-insts"] = true;
175       Features["dpp"] = true;
176       break;
177 
178     case GK_NONE:
179       return false;
180     default:
181       llvm_unreachable("unhandled subtarget");
182     }
183   } else {
184     if (CPU.empty())
185       CPU = "r600";
186 
187     switch (parseR600Name(CPU)) {
188     case GK_R600:
189     case GK_R700:
190     case GK_EVERGREEN:
191     case GK_NORTHERN_ISLANDS:
192       break;
193     case GK_R600_DOUBLE_OPS:
194     case GK_R700_DOUBLE_OPS:
195     case GK_EVERGREEN_DOUBLE_OPS:
196     case GK_CAYMAN:
197       // TODO: Add fp64 when implemented.
198       break;
199     case GK_NONE:
200       return false;
201     default:
202       llvm_unreachable("unhandled subtarget");
203     }
204   }
205 
206   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
207 }
208 
209 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
210                                            TargetOptions &TargetOpts) const {
211   bool hasFP32Denormals = false;
212   bool hasFP64Denormals = false;
213   for (auto &I : TargetOpts.FeaturesAsWritten) {
214     if (I == "+fp32-denormals" || I == "-fp32-denormals")
215       hasFP32Denormals = true;
216     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
217       hasFP64Denormals = true;
218   }
219   if (!hasFP32Denormals)
220     TargetOpts.Features.push_back(
221         (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm
222                    ? '+'
223                    : '-') +
224          Twine("fp32-denormals"))
225             .str());
226   // Always do not flush fp64 or fp16 denorms.
227   if (!hasFP64Denormals && hasFP64)
228     TargetOpts.Features.push_back("+fp64-fp16-denormals");
229 }
230 
231 
232 constexpr AMDGPUTargetInfo::NameGPUKind AMDGPUTargetInfo::R600Names[];
233 constexpr AMDGPUTargetInfo::NameGPUKind AMDGPUTargetInfo::AMDGCNNames[];
234 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) {
235   const auto *Result = llvm::find_if(
236       R600Names, [Name](const NameGPUKind &Kind) { return Kind.Name == Name; });
237 
238   if (Result == std::end(R600Names))
239     return GK_NONE;
240   return Result->Kind;
241 }
242 
243 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
244   const auto *Result =
245       llvm::find_if(AMDGCNNames, [Name](const NameGPUKind &Kind) {
246         return Kind.Name == Name;
247       });
248 
249   if (Result == std::end(AMDGCNNames))
250     return GK_NONE;
251   return Result->Kind;
252 }
253 
254 void AMDGPUTargetInfo::fillValidCPUList(
255     SmallVectorImpl<StringRef> &Values) const {
256   if (getTriple().getArch() == llvm::Triple::amdgcn)
257     llvm::for_each(AMDGCNNames, [&Values](const NameGPUKind &Kind) {
258                    Values.emplace_back(Kind.Name);});
259   else
260     llvm::for_each(R600Names, [&Values](const NameGPUKind &Kind) {
261                    Values.emplace_back(Kind.Name);});
262 }
263 
264 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
265   if (isGenericZero(getTriple())) {
266     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
267                                     : &AMDGPUGenIsZeroDefIsGenMap;
268   } else {
269     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
270                                     : &AMDGPUPrivIsZeroDefIsGenMap;
271   }
272 }
273 
274 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
275                                    const TargetOptions &Opts)
276     : TargetInfo(Triple),
277       GPU(isAMDGCN(Triple) ? GK_GFX6 : parseR600Name(Opts.CPU)),
278       hasFP64(false), hasFMAF(false), hasLDEXPF(false),
279       AS(isGenericZero(Triple)) {
280   if (getTriple().getArch() == llvm::Triple::amdgcn) {
281     hasFP64 = true;
282     hasFMAF = true;
283     hasLDEXPF = true;
284   }
285   if (getTriple().getArch() == llvm::Triple::r600) {
286     if (GPU == GK_EVERGREEN_DOUBLE_OPS || GPU == GK_CAYMAN) {
287       hasFMAF = true;
288     }
289   }
290   auto IsGenericZero = isGenericZero(Triple);
291   resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn
292                       ? (IsGenericZero ? DataLayoutStringSIGenericIsZero
293                                        : DataLayoutStringSIPrivateIsZero)
294                       : DataLayoutStringR600);
295   assert(DataLayout->getAllocaAddrSpace() == AS.Private);
296 
297   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
298                      Triple.getEnvironment() == llvm::Triple::OpenCL ||
299                      Triple.getEnvironmentName() == "amdgizcl" ||
300                      !isAMDGCN(Triple));
301   UseAddrSpaceMapMangling = true;
302 
303   // Set pointer width and alignment for target address space 0.
304   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
305   if (getMaxPointerWidth() == 64) {
306     LongWidth = LongAlign = 64;
307     SizeType = UnsignedLong;
308     PtrDiffType = SignedLong;
309     IntPtrType = SignedLong;
310   }
311 
312   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
313 }
314 
315 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
316   TargetInfo::adjust(Opts);
317   setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple()));
318 }
319 
320 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
321   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
322                                              Builtin::FirstTSBuiltin);
323 }
324 
325 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
326                                         MacroBuilder &Builder) const {
327   if (getTriple().getArch() == llvm::Triple::amdgcn)
328     Builder.defineMacro("__AMDGCN__");
329   else
330     Builder.defineMacro("__R600__");
331 
332   if (hasFMAF)
333     Builder.defineMacro("__HAS_FMAF__");
334   if (hasLDEXPF)
335     Builder.defineMacro("__HAS_LDEXPF__");
336   if (hasFP64)
337     Builder.defineMacro("__HAS_FP64__");
338 }
339