1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   // XXX - What does the member GPU mean if device name string passed here?
131   if (isAMDGCN(getTriple())) {
132     if (CPU.empty())
133       CPU = "gfx600";
134 
135     switch (parseAMDGCNName(CPU).Kind) {
136     case GK_GFX906:
137       Features["dl-insts"] = true;
138       LLVM_FALLTHROUGH;
139     case GK_GFX904:
140     case GK_GFX902:
141     case GK_GFX900:
142       Features["gfx9-insts"] = true;
143       LLVM_FALLTHROUGH;
144     case GK_GFX810:
145     case GK_GFX803:
146     case GK_GFX802:
147     case GK_GFX801:
148       Features["vi-insts"] = true;
149       Features["16-bit-insts"] = true;
150       Features["dpp"] = true;
151       Features["s-memrealtime"] = true;
152       LLVM_FALLTHROUGH;
153     case GK_GFX704:
154     case GK_GFX703:
155     case GK_GFX702:
156     case GK_GFX701:
157     case GK_GFX700:
158       Features["ci-insts"] = true;
159       LLVM_FALLTHROUGH;
160     case GK_GFX601:
161     case GK_GFX600:
162       break;
163     case GK_NONE:
164       return false;
165     default:
166       llvm_unreachable("Unhandled GPU!");
167     }
168   } else {
169     if (CPU.empty())
170       CPU = "r600";
171 
172     switch (parseR600Name(CPU).Kind) {
173     case GK_CAYMAN:
174     case GK_CYPRESS:
175     case GK_RV770:
176     case GK_RV670:
177       // TODO: Add fp64 when implemented.
178       break;
179     case GK_TURKS:
180     case GK_CAICOS:
181     case GK_BARTS:
182     case GK_SUMO:
183     case GK_REDWOOD:
184     case GK_JUNIPER:
185     case GK_CEDAR:
186     case GK_RV730:
187     case GK_RV710:
188     case GK_RS880:
189     case GK_R630:
190     case GK_R600:
191       break;
192     default:
193       llvm_unreachable("Unhandled GPU!");
194     }
195   }
196 
197   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
198 }
199 
200 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
201                                            TargetOptions &TargetOpts) const {
202   bool hasFP32Denormals = false;
203   bool hasFP64Denormals = false;
204   GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU);
205   for (auto &I : TargetOpts.FeaturesAsWritten) {
206     if (I == "+fp32-denormals" || I == "-fp32-denormals")
207       hasFP32Denormals = true;
208     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
209       hasFP64Denormals = true;
210   }
211   if (!hasFP32Denormals)
212     TargetOpts.Features.push_back(
213         (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm
214                    ? '+'
215                    : '-') +
216          Twine("fp32-denormals"))
217             .str());
218   // Always do not flush fp64 or fp16 denorms.
219   if (!hasFP64Denormals && CGOptsGPU.HasFP64)
220     TargetOpts.Features.push_back("+fp64-fp16-denormals");
221 }
222 
223 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU;
224 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[];
225 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[];
226 
227 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) {
228   const auto *Result = llvm::find_if(
229       R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
230 
231   if (Result == std::end(R600GPUs))
232     return InvalidGPU;
233   return *Result;
234 }
235 
236 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
237   const auto *Result = llvm::find_if(
238       AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
239 
240   if (Result == std::end(AMDGCNGPUs))
241     return InvalidGPU;
242   return *Result;
243 }
244 
245 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const {
246   if (isAMDGCN(getTriple()))
247     return parseAMDGCNName(Name);
248   else
249     return parseR600Name(Name);
250 }
251 
252 void AMDGPUTargetInfo::fillValidCPUList(
253     SmallVectorImpl<StringRef> &Values) const {
254   if (isAMDGCN(getTriple()))
255     llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) {
256                    Values.emplace_back(GPU.Name);});
257   else
258     llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) {
259                    Values.emplace_back(GPU.Name);});
260 }
261 
262 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
263   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
264 }
265 
266 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
267                                    const TargetOptions &Opts)
268     : TargetInfo(Triple),
269       GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) {
270   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
271                                         : DataLayoutStringR600);
272   assert(DataLayout->getAllocaAddrSpace() == Private);
273 
274   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
275                      !isAMDGCN(Triple));
276   UseAddrSpaceMapMangling = true;
277 
278   // Set pointer width and alignment for target address space 0.
279   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
280   if (getMaxPointerWidth() == 64) {
281     LongWidth = LongAlign = 64;
282     SizeType = UnsignedLong;
283     PtrDiffType = SignedLong;
284     IntPtrType = SignedLong;
285   }
286 
287   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
288 }
289 
290 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
291   TargetInfo::adjust(Opts);
292   // ToDo: There are still a few places using default address space as private
293   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
294   // can be removed from the following line.
295   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
296                      !isAMDGCN(getTriple()));
297 }
298 
299 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
300   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
301                                              Builtin::FirstTSBuiltin);
302 }
303 
304 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
305                                         MacroBuilder &Builder) const {
306   Builder.defineMacro("__AMD__");
307   Builder.defineMacro("__AMDGPU__");
308 
309   if (isAMDGCN(getTriple()))
310     Builder.defineMacro("__AMDGCN__");
311   else
312     Builder.defineMacro("__R600__");
313 
314   if (GPU.Kind != GK_NONE)
315     Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__"));
316 
317   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
318   // removed in the near future.
319   if (GPU.HasFMAF)
320     Builder.defineMacro("__HAS_FMAF__");
321   if (GPU.HasFastFMAF)
322     Builder.defineMacro("FP_FAST_FMAF");
323   if (GPU.HasLDEXPF)
324     Builder.defineMacro("__HAS_LDEXPF__");
325   if (GPU.HasFP64)
326     Builder.defineMacro("__HAS_FP64__");
327   if (GPU.HasFastFMA)
328     Builder.defineMacro("FP_FAST_FMA");
329 }
330