1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   // XXX - What does the member GPU mean if device name string passed here?
131   if (isAMDGCN(getTriple())) {
132     if (CPU.empty())
133       CPU = "gfx600";
134 
135     switch (parseAMDGCNName(CPU).Kind) {
136     case GK_GFX906:
137       Features["dl-insts"] = true;
138       LLVM_FALLTHROUGH;
139     case GK_GFX904:
140     case GK_GFX902:
141     case GK_GFX900:
142       Features["gfx9-insts"] = true;
143       LLVM_FALLTHROUGH;
144     case GK_GFX810:
145     case GK_GFX803:
146     case GK_GFX802:
147     case GK_GFX801:
148       Features["16-bit-insts"] = true;
149       Features["dpp"] = true;
150       Features["s-memrealtime"] = true;
151       break;
152     case GK_GFX704:
153     case GK_GFX703:
154     case GK_GFX702:
155     case GK_GFX701:
156     case GK_GFX700:
157     case GK_GFX601:
158     case GK_GFX600:
159       break;
160     case GK_NONE:
161       return false;
162     default:
163       llvm_unreachable("Unhandled GPU!");
164     }
165   } else {
166     if (CPU.empty())
167       CPU = "r600";
168 
169     switch (parseR600Name(CPU).Kind) {
170     case GK_CAYMAN:
171     case GK_CYPRESS:
172     case GK_RV770:
173     case GK_RV670:
174       // TODO: Add fp64 when implemented.
175       break;
176     case GK_TURKS:
177     case GK_CAICOS:
178     case GK_BARTS:
179     case GK_SUMO:
180     case GK_REDWOOD:
181     case GK_JUNIPER:
182     case GK_CEDAR:
183     case GK_RV730:
184     case GK_RV710:
185     case GK_RS880:
186     case GK_R630:
187     case GK_R600:
188       break;
189     default:
190       llvm_unreachable("Unhandled GPU!");
191     }
192   }
193 
194   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
195 }
196 
197 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
198                                            TargetOptions &TargetOpts) const {
199   bool hasFP32Denormals = false;
200   bool hasFP64Denormals = false;
201   GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU);
202   for (auto &I : TargetOpts.FeaturesAsWritten) {
203     if (I == "+fp32-denormals" || I == "-fp32-denormals")
204       hasFP32Denormals = true;
205     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
206       hasFP64Denormals = true;
207   }
208   if (!hasFP32Denormals)
209     TargetOpts.Features.push_back(
210         (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm
211                    ? '+'
212                    : '-') +
213          Twine("fp32-denormals"))
214             .str());
215   // Always do not flush fp64 or fp16 denorms.
216   if (!hasFP64Denormals && CGOptsGPU.HasFP64)
217     TargetOpts.Features.push_back("+fp64-fp16-denormals");
218 }
219 
220 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU;
221 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[];
222 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[];
223 
224 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) {
225   const auto *Result = llvm::find_if(
226       R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
227 
228   if (Result == std::end(R600GPUs))
229     return InvalidGPU;
230   return *Result;
231 }
232 
233 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
234   const auto *Result = llvm::find_if(
235       AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
236 
237   if (Result == std::end(AMDGCNGPUs))
238     return InvalidGPU;
239   return *Result;
240 }
241 
242 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const {
243   if (isAMDGCN(getTriple()))
244     return parseAMDGCNName(Name);
245   else
246     return parseR600Name(Name);
247 }
248 
249 void AMDGPUTargetInfo::fillValidCPUList(
250     SmallVectorImpl<StringRef> &Values) const {
251   if (isAMDGCN(getTriple()))
252     llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) {
253                    Values.emplace_back(GPU.Name);});
254   else
255     llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) {
256                    Values.emplace_back(GPU.Name);});
257 }
258 
259 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
260   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
261 }
262 
263 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
264                                    const TargetOptions &Opts)
265     : TargetInfo(Triple),
266       GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) {
267   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
268                                         : DataLayoutStringR600);
269   assert(DataLayout->getAllocaAddrSpace() == Private);
270 
271   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
272                      !isAMDGCN(Triple));
273   UseAddrSpaceMapMangling = true;
274 
275   // Set pointer width and alignment for target address space 0.
276   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
277   if (getMaxPointerWidth() == 64) {
278     LongWidth = LongAlign = 64;
279     SizeType = UnsignedLong;
280     PtrDiffType = SignedLong;
281     IntPtrType = SignedLong;
282   }
283 
284   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
285 }
286 
287 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
288   TargetInfo::adjust(Opts);
289   // ToDo: There are still a few places using default address space as private
290   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
291   // can be removed from the following line.
292   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
293                      !isAMDGCN(getTriple()));
294 }
295 
296 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
297   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
298                                              Builtin::FirstTSBuiltin);
299 }
300 
301 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
302                                         MacroBuilder &Builder) const {
303   Builder.defineMacro("__AMD__");
304   Builder.defineMacro("__AMDGPU__");
305 
306   if (isAMDGCN(getTriple()))
307     Builder.defineMacro("__AMDGCN__");
308   else
309     Builder.defineMacro("__R600__");
310 
311   if (GPU.Kind != GK_NONE)
312     Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__"));
313 
314   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
315   // removed in the near future.
316   if (GPU.HasFMAF)
317     Builder.defineMacro("__HAS_FMAF__");
318   if (GPU.HasFastFMAF)
319     Builder.defineMacro("FP_FAST_FMAF");
320   if (GPU.HasLDEXPF)
321     Builder.defineMacro("__HAS_LDEXPF__");
322   if (GPU.HasFP64)
323     Builder.defineMacro("__HAS_FP64__");
324   if (GPU.HasFastFMA)
325     Builder.defineMacro("FP_FAST_FMA");
326 }
327