1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
34 
35 static const char *const DataLayoutStringSIPrivateIsZero =
36     "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
39 
40 static const char *const DataLayoutStringSIGenericIsZero =
41     "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
42     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
43     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
44 
45 static const LangAS::Map AMDGPUPrivIsZeroDefIsGenMap = {
46     4, // Default
47     1, // opencl_global
48     3, // opencl_local
49     2, // opencl_constant
50     4, // opencl_generic
51     1, // cuda_device
52     2, // cuda_constant
53     3  // cuda_shared
54 };
55 
56 static const LangAS::Map AMDGPUGenIsZeroDefIsGenMap = {
57     0, // Default
58     1, // opencl_global
59     3, // opencl_local
60     2, // opencl_constant
61     0, // opencl_generic
62     1, // cuda_device
63     2, // cuda_constant
64     3  // cuda_shared
65 };
66 
67 static const LangAS::Map AMDGPUPrivIsZeroDefIsPrivMap = {
68     0, // Default
69     1, // opencl_global
70     3, // opencl_local
71     2, // opencl_constant
72     4, // opencl_generic
73     1, // cuda_device
74     2, // cuda_constant
75     3  // cuda_shared
76 };
77 
78 static const LangAS::Map AMDGPUGenIsZeroDefIsPrivMap = {
79     5, // Default
80     1, // opencl_global
81     3, // opencl_local
82     2, // opencl_constant
83     0, // opencl_generic
84     1, // cuda_device
85     2, // cuda_constant
86     3  // cuda_shared
87 };
88 } // namespace targets
89 } // namespace clang
90 
91 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
92 #define BUILTIN(ID, TYPE, ATTRS)                                               \
93   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
94 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
95   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
96 #include "clang/Basic/BuiltinsAMDGPU.def"
97 };
98 
99 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
100   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
101   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
102   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
103   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
104   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
105   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
106   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
107   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
108   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
109   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
110   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
111   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
112   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
113   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
114   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
115   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
116   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
117   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
118   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
119   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
120   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
121   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
122   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
123   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
124   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
125   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
126   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
127   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
128   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
129   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
130   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
131   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
132   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
133   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
134   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
135   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
136   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
137   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
138   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
139   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
140   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
141   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
142   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
143   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
144   "flat_scratch_lo", "flat_scratch_hi"
145 };
146 
147 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
148   return llvm::makeArrayRef(GCCRegNames);
149 }
150 
151 bool AMDGPUTargetInfo::initFeatureMap(
152     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
153     const std::vector<std::string> &FeatureVec) const {
154 
155   // XXX - What does the member GPU mean if device name string passed here?
156   if (getTriple().getArch() == llvm::Triple::amdgcn) {
157     if (CPU.empty())
158       CPU = "tahiti";
159 
160     switch (parseAMDGCNName(CPU)) {
161     case GK_GFX6:
162     case GK_GFX7:
163       break;
164 
165     case GK_GFX9:
166       Features["gfx9-insts"] = true;
167       LLVM_FALLTHROUGH;
168     case GK_GFX8:
169       Features["s-memrealtime"] = true;
170       Features["16-bit-insts"] = true;
171       Features["dpp"] = true;
172       break;
173 
174     case GK_NONE:
175       return false;
176     default:
177       llvm_unreachable("unhandled subtarget");
178     }
179   } else {
180     if (CPU.empty())
181       CPU = "r600";
182 
183     switch (parseR600Name(CPU)) {
184     case GK_R600:
185     case GK_R700:
186     case GK_EVERGREEN:
187     case GK_NORTHERN_ISLANDS:
188       break;
189     case GK_R600_DOUBLE_OPS:
190     case GK_R700_DOUBLE_OPS:
191     case GK_EVERGREEN_DOUBLE_OPS:
192     case GK_CAYMAN:
193       Features["fp64"] = true;
194       break;
195     case GK_NONE:
196       return false;
197     default:
198       llvm_unreachable("unhandled subtarget");
199     }
200   }
201 
202   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
203 }
204 
205 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
206                                            TargetOptions &TargetOpts) const {
207   bool hasFP32Denormals = false;
208   bool hasFP64Denormals = false;
209   for (auto &I : TargetOpts.FeaturesAsWritten) {
210     if (I == "+fp32-denormals" || I == "-fp32-denormals")
211       hasFP32Denormals = true;
212     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
213       hasFP64Denormals = true;
214   }
215   if (!hasFP32Denormals)
216     TargetOpts.Features.push_back(
217         (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm
218                    ? '+'
219                    : '-') +
220          Twine("fp32-denormals"))
221             .str());
222   // Always do not flush fp64 or fp16 denorms.
223   if (!hasFP64Denormals && hasFP64)
224     TargetOpts.Features.push_back("+fp64-fp16-denormals");
225 }
226 
227 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) {
228   return llvm::StringSwitch<GPUKind>(Name)
229       .Case("r600", GK_R600)
230       .Case("rv610", GK_R600)
231       .Case("rv620", GK_R600)
232       .Case("rv630", GK_R600)
233       .Case("rv635", GK_R600)
234       .Case("rs780", GK_R600)
235       .Case("rs880", GK_R600)
236       .Case("rv670", GK_R600_DOUBLE_OPS)
237       .Case("rv710", GK_R700)
238       .Case("rv730", GK_R700)
239       .Case("rv740", GK_R700_DOUBLE_OPS)
240       .Case("rv770", GK_R700_DOUBLE_OPS)
241       .Case("palm", GK_EVERGREEN)
242       .Case("cedar", GK_EVERGREEN)
243       .Case("sumo", GK_EVERGREEN)
244       .Case("sumo2", GK_EVERGREEN)
245       .Case("redwood", GK_EVERGREEN)
246       .Case("juniper", GK_EVERGREEN)
247       .Case("hemlock", GK_EVERGREEN_DOUBLE_OPS)
248       .Case("cypress", GK_EVERGREEN_DOUBLE_OPS)
249       .Case("barts", GK_NORTHERN_ISLANDS)
250       .Case("turks", GK_NORTHERN_ISLANDS)
251       .Case("caicos", GK_NORTHERN_ISLANDS)
252       .Case("cayman", GK_CAYMAN)
253       .Case("aruba", GK_CAYMAN)
254       .Default(GK_NONE);
255 }
256 
257 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
258   return llvm::StringSwitch<GPUKind>(Name)
259       .Case("tahiti", GK_GFX6)
260       .Case("pitcairn", GK_GFX6)
261       .Case("verde", GK_GFX6)
262       .Case("oland", GK_GFX6)
263       .Case("hainan", GK_GFX6)
264       .Case("bonaire", GK_GFX7)
265       .Case("kabini", GK_GFX7)
266       .Case("kaveri", GK_GFX7)
267       .Case("hawaii", GK_GFX7)
268       .Case("mullins", GK_GFX7)
269       .Case("gfx700", GK_GFX7)
270       .Case("gfx701", GK_GFX7)
271       .Case("gfx702", GK_GFX7)
272       .Case("tonga", GK_GFX8)
273       .Case("iceland", GK_GFX8)
274       .Case("carrizo", GK_GFX8)
275       .Case("fiji", GK_GFX8)
276       .Case("stoney", GK_GFX8)
277       .Case("polaris10", GK_GFX8)
278       .Case("polaris11", GK_GFX8)
279       .Case("gfx800", GK_GFX8)
280       .Case("gfx801", GK_GFX8)
281       .Case("gfx802", GK_GFX8)
282       .Case("gfx803", GK_GFX8)
283       .Case("gfx804", GK_GFX8)
284       .Case("gfx810", GK_GFX8)
285       .Case("gfx900", GK_GFX9)
286       .Case("gfx901", GK_GFX9)
287       .Default(GK_NONE);
288 }
289 
290 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
291   if (isGenericZero(getTriple())) {
292     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
293                                     : &AMDGPUGenIsZeroDefIsGenMap;
294   } else {
295     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
296                                     : &AMDGPUPrivIsZeroDefIsGenMap;
297   }
298 }
299 
300 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
301                                    const TargetOptions &Opts)
302     : TargetInfo(Triple), GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600),
303       hasFP64(false), hasFMAF(false), hasLDEXPF(false),
304       AS(isGenericZero(Triple)) {
305   if (getTriple().getArch() == llvm::Triple::amdgcn) {
306     hasFP64 = true;
307     hasFMAF = true;
308     hasLDEXPF = true;
309   }
310   auto IsGenericZero = isGenericZero(Triple);
311   resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn
312                       ? (IsGenericZero ? DataLayoutStringSIGenericIsZero
313                                        : DataLayoutStringSIPrivateIsZero)
314                       : DataLayoutStringR600);
315   assert(DataLayout->getAllocaAddrSpace() == AS.Private);
316 
317   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
318                      Triple.getEnvironment() == llvm::Triple::OpenCL ||
319                      Triple.getEnvironmentName() == "amdgizcl" ||
320                      !isAMDGCN(Triple));
321   UseAddrSpaceMapMangling = true;
322 
323   // Set pointer width and alignment for target address space 0.
324   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
325   if (getMaxPointerWidth() == 64) {
326     LongWidth = LongAlign = 64;
327     SizeType = UnsignedLong;
328     PtrDiffType = SignedLong;
329     IntPtrType = SignedLong;
330   }
331 }
332 
333 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
334   TargetInfo::adjust(Opts);
335   setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple()));
336 }
337 
338 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
339   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
340                                              Builtin::FirstTSBuiltin);
341 }
342 
343 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
344                                         MacroBuilder &Builder) const {
345   if (getTriple().getArch() == llvm::Triple::amdgcn)
346     Builder.defineMacro("__AMDGCN__");
347   else
348     Builder.defineMacro("__R600__");
349 
350   if (hasFMAF)
351     Builder.defineMacro("__HAS_FMAF__");
352   if (hasLDEXPF)
353     Builder.defineMacro("__HAS_LDEXPF__");
354   if (hasFP64)
355     Builder.defineMacro("__HAS_FP64__");
356 }
357