1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
34 
35 static const char *const DataLayoutStringSIPrivateIsZero =
36     "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
39 
40 static const char *const DataLayoutStringSIGenericIsZero =
41     "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
42     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
43     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
44 
45 static const LangAS::Map AMDGPUPrivIsZeroDefIsGenMap = {
46     4, // Default
47     1, // opencl_global
48     3, // opencl_local
49     2, // opencl_constant
50     4, // opencl_generic
51     1, // cuda_device
52     2, // cuda_constant
53     3  // cuda_shared
54 };
55 
56 static const LangAS::Map AMDGPUGenIsZeroDefIsGenMap = {
57     0, // Default
58     1, // opencl_global
59     3, // opencl_local
60     2, // opencl_constant
61     0, // opencl_generic
62     1, // cuda_device
63     2, // cuda_constant
64     3  // cuda_shared
65 };
66 
67 static const LangAS::Map AMDGPUPrivIsZeroDefIsPrivMap = {
68     0, // Default
69     1, // opencl_global
70     3, // opencl_local
71     2, // opencl_constant
72     4, // opencl_generic
73     1, // cuda_device
74     2, // cuda_constant
75     3  // cuda_shared
76 };
77 
78 static const LangAS::Map AMDGPUGenIsZeroDefIsPrivMap = {
79     5, // Default
80     1, // opencl_global
81     3, // opencl_local
82     2, // opencl_constant
83     0, // opencl_generic
84     1, // cuda_device
85     2, // cuda_constant
86     3  // cuda_shared
87 };
88 } // namespace targets
89 } // namespace clang
90 
91 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
92 #define BUILTIN(ID, TYPE, ATTRS)                                               \
93   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
94 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
95   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
96 #include "clang/Basic/BuiltinsAMDGPU.def"
97 };
98 
99 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
100   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
101   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
102   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
103   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
104   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
105   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
106   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
107   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
108   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
109   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
110   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
111   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
112   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
113   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
114   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
115   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
116   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
117   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
118   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
119   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
120   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
121   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
122   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
123   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
124   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
125   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
126   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
127   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
128   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
129   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
130   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
131   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
132   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
133   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
134   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
135   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
136   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
137   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
138   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
139   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
140   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
141   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
142   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
143   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
144   "flat_scratch_lo", "flat_scratch_hi"
145 };
146 
147 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
148   return llvm::makeArrayRef(GCCRegNames);
149 }
150 
151 bool AMDGPUTargetInfo::initFeatureMap(
152     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
153     const std::vector<std::string> &FeatureVec) const {
154 
155   // XXX - What does the member GPU mean if device name string passed here?
156   if (getTriple().getArch() == llvm::Triple::amdgcn) {
157     if (CPU.empty())
158       CPU = "tahiti";
159 
160     switch (parseAMDGCNName(CPU)) {
161     case GK_GFX6:
162     case GK_GFX7:
163       break;
164 
165     case GK_GFX9:
166       Features["gfx9-insts"] = true;
167       LLVM_FALLTHROUGH;
168     case GK_GFX8:
169       Features["s-memrealtime"] = true;
170       Features["16-bit-insts"] = true;
171       Features["dpp"] = true;
172       break;
173 
174     case GK_NONE:
175       return false;
176     default:
177       llvm_unreachable("unhandled subtarget");
178     }
179   } else {
180     if (CPU.empty())
181       CPU = "r600";
182 
183     switch (parseR600Name(CPU)) {
184     case GK_R600:
185     case GK_R700:
186     case GK_EVERGREEN:
187     case GK_NORTHERN_ISLANDS:
188       break;
189     case GK_R600_DOUBLE_OPS:
190     case GK_R700_DOUBLE_OPS:
191     case GK_EVERGREEN_DOUBLE_OPS:
192     case GK_CAYMAN:
193       Features["fp64"] = true;
194       break;
195     case GK_NONE:
196       return false;
197     default:
198       llvm_unreachable("unhandled subtarget");
199     }
200   }
201 
202   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
203 }
204 
205 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
206                                            TargetOptions &TargetOpts) const {
207   bool hasFP32Denormals = false;
208   bool hasFP64Denormals = false;
209   for (auto &I : TargetOpts.FeaturesAsWritten) {
210     if (I == "+fp32-denormals" || I == "-fp32-denormals")
211       hasFP32Denormals = true;
212     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
213       hasFP64Denormals = true;
214   }
215   if (!hasFP32Denormals)
216     TargetOpts.Features.push_back(
217         (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm
218                    ? '+'
219                    : '-') +
220          Twine("fp32-denormals"))
221             .str());
222   // Always do not flush fp64 or fp16 denorms.
223   if (!hasFP64Denormals && hasFP64)
224     TargetOpts.Features.push_back("+fp64-fp16-denormals");
225 }
226 
227 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) {
228   return llvm::StringSwitch<GPUKind>(Name)
229       .Case("r600", GK_R600)
230       .Case("rv610", GK_R600)
231       .Case("rv620", GK_R600)
232       .Case("rv630", GK_R600)
233       .Case("rv635", GK_R600)
234       .Case("rs780", GK_R600)
235       .Case("rs880", GK_R600)
236       .Case("rv670", GK_R600_DOUBLE_OPS)
237       .Case("rv710", GK_R700)
238       .Case("rv730", GK_R700)
239       .Case("rv740", GK_R700_DOUBLE_OPS)
240       .Case("rv770", GK_R700_DOUBLE_OPS)
241       .Case("palm", GK_EVERGREEN)
242       .Case("cedar", GK_EVERGREEN)
243       .Case("sumo", GK_EVERGREEN)
244       .Case("sumo2", GK_EVERGREEN)
245       .Case("redwood", GK_EVERGREEN)
246       .Case("juniper", GK_EVERGREEN)
247       .Case("hemlock", GK_EVERGREEN_DOUBLE_OPS)
248       .Case("cypress", GK_EVERGREEN_DOUBLE_OPS)
249       .Case("barts", GK_NORTHERN_ISLANDS)
250       .Case("turks", GK_NORTHERN_ISLANDS)
251       .Case("caicos", GK_NORTHERN_ISLANDS)
252       .Case("cayman", GK_CAYMAN)
253       .Case("aruba", GK_CAYMAN)
254       .Default(GK_NONE);
255 }
256 
257 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
258   return llvm::StringSwitch<GPUKind>(Name)
259       .Case("gfx600", GK_GFX6)
260       .Case("tahiti", GK_GFX6)
261       .Case("gfx601", GK_GFX6)
262       .Case("pitcairn", GK_GFX6)
263       .Case("verde", GK_GFX6)
264       .Case("oland", GK_GFX6)
265       .Case("hainan", GK_GFX6)
266       .Case("gfx700", GK_GFX7)
267       .Case("bonaire", GK_GFX7)
268       .Case("kaveri", GK_GFX7)
269       .Case("gfx701", GK_GFX7)
270       .Case("hawaii", GK_GFX7)
271       .Case("gfx702", GK_GFX7)
272       .Case("gfx703", GK_GFX7)
273       .Case("kabini", GK_GFX7)
274       .Case("mullins", GK_GFX7)
275       .Case("gfx800", GK_GFX8)
276       .Case("iceland", GK_GFX8)
277       .Case("gfx801", GK_GFX8)
278       .Case("carrizo", GK_GFX8)
279       .Case("gfx802", GK_GFX8)
280       .Case("tonga", GK_GFX8)
281       .Case("gfx803", GK_GFX8)
282       .Case("fiji", GK_GFX8)
283       .Case("polaris10", GK_GFX8)
284       .Case("polaris11", GK_GFX8)
285       .Case("gfx804", GK_GFX8)
286       .Case("gfx810", GK_GFX8)
287       .Case("stoney", GK_GFX8)
288       .Case("gfx900", GK_GFX9)
289       .Case("gfx901", GK_GFX9)
290       .Case("gfx902", GK_GFX9)
291       .Case("gfx903", GK_GFX9)
292       .Default(GK_NONE);
293 }
294 
295 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
296   if (isGenericZero(getTriple())) {
297     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
298                                     : &AMDGPUGenIsZeroDefIsGenMap;
299   } else {
300     AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
301                                     : &AMDGPUPrivIsZeroDefIsGenMap;
302   }
303 }
304 
305 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
306                                    const TargetOptions &Opts)
307     : TargetInfo(Triple), GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600),
308       hasFP64(false), hasFMAF(false), hasLDEXPF(false),
309       AS(isGenericZero(Triple)) {
310   if (getTriple().getArch() == llvm::Triple::amdgcn) {
311     hasFP64 = true;
312     hasFMAF = true;
313     hasLDEXPF = true;
314   }
315   auto IsGenericZero = isGenericZero(Triple);
316   resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn
317                       ? (IsGenericZero ? DataLayoutStringSIGenericIsZero
318                                        : DataLayoutStringSIPrivateIsZero)
319                       : DataLayoutStringR600);
320   assert(DataLayout->getAllocaAddrSpace() == AS.Private);
321 
322   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
323                      Triple.getEnvironment() == llvm::Triple::OpenCL ||
324                      Triple.getEnvironmentName() == "amdgizcl" ||
325                      !isAMDGCN(Triple));
326   UseAddrSpaceMapMangling = true;
327 
328   // Set pointer width and alignment for target address space 0.
329   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
330   if (getMaxPointerWidth() == 64) {
331     LongWidth = LongAlign = 64;
332     SizeType = UnsignedLong;
333     PtrDiffType = SignedLong;
334     IntPtrType = SignedLong;
335   }
336 
337   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
338 }
339 
340 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
341   TargetInfo::adjust(Opts);
342   setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple()));
343 }
344 
345 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
346   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
347                                              Builtin::FirstTSBuiltin);
348 }
349 
350 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
351                                         MacroBuilder &Builder) const {
352   if (getTriple().getArch() == llvm::Triple::amdgcn)
353     Builder.defineMacro("__AMDGCN__");
354   else
355     Builder.defineMacro("__R600__");
356 
357   if (hasFMAF)
358     Builder.defineMacro("__HAS_FMAF__");
359   if (hasLDEXPF)
360     Builder.defineMacro("__HAS_LDEXPF__");
361   if (hasFP64)
362     Builder.defineMacro("__HAS_FP64__");
363 }
364