1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   // XXX - What does the member GPU mean if device name string passed here?
131   if (isAMDGCN(getTriple())) {
132     if (CPU.empty())
133       CPU = "gfx600";
134 
135     switch (parseAMDGCNName(CPU).Kind) {
136     case GK_GFX902:
137     case GK_GFX900:
138       Features["gfx9-insts"] = true;
139       LLVM_FALLTHROUGH;
140     case GK_GFX810:
141     case GK_GFX803:
142     case GK_GFX802:
143     case GK_GFX801:
144       Features["16-bit-insts"] = true;
145       Features["dpp"] = true;
146       Features["s-memrealtime"] = true;
147       break;
148     case GK_GFX704:
149     case GK_GFX703:
150     case GK_GFX702:
151     case GK_GFX701:
152     case GK_GFX700:
153     case GK_GFX601:
154     case GK_GFX600:
155       break;
156     case GK_NONE:
157       return false;
158     default:
159       llvm_unreachable("Unhandled GPU!");
160     }
161   } else {
162     if (CPU.empty())
163       CPU = "r600";
164 
165     switch (parseR600Name(CPU).Kind) {
166     case GK_CAYMAN:
167     case GK_CYPRESS:
168     case GK_RV770:
169     case GK_RV670:
170       // TODO: Add fp64 when implemented.
171       break;
172     case GK_TURKS:
173     case GK_CAICOS:
174     case GK_BARTS:
175     case GK_SUMO:
176     case GK_REDWOOD:
177     case GK_JUNIPER:
178     case GK_CEDAR:
179     case GK_RV730:
180     case GK_RV710:
181     case GK_RS880:
182     case GK_R630:
183     case GK_R600:
184       break;
185     default:
186       llvm_unreachable("Unhandled GPU!");
187     }
188   }
189 
190   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
191 }
192 
193 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
194                                            TargetOptions &TargetOpts) const {
195   bool hasFP32Denormals = false;
196   bool hasFP64Denormals = false;
197   GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU);
198   for (auto &I : TargetOpts.FeaturesAsWritten) {
199     if (I == "+fp32-denormals" || I == "-fp32-denormals")
200       hasFP32Denormals = true;
201     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
202       hasFP64Denormals = true;
203   }
204   if (!hasFP32Denormals)
205     TargetOpts.Features.push_back(
206         (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm
207                    ? '+'
208                    : '-') +
209          Twine("fp32-denormals"))
210             .str());
211   // Always do not flush fp64 or fp16 denorms.
212   if (!hasFP64Denormals && CGOptsGPU.HasFP64)
213     TargetOpts.Features.push_back("+fp64-fp16-denormals");
214 }
215 
216 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU;
217 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[];
218 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[];
219 
220 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) {
221   const auto *Result = llvm::find_if(
222       R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
223 
224   if (Result == std::end(R600GPUs))
225     return InvalidGPU;
226   return *Result;
227 }
228 
229 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
230   const auto *Result = llvm::find_if(
231       AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
232 
233   if (Result == std::end(AMDGCNGPUs))
234     return InvalidGPU;
235   return *Result;
236 }
237 
238 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const {
239   if (isAMDGCN(getTriple()))
240     return parseAMDGCNName(Name);
241   else
242     return parseR600Name(Name);
243 }
244 
245 void AMDGPUTargetInfo::fillValidCPUList(
246     SmallVectorImpl<StringRef> &Values) const {
247   if (isAMDGCN(getTriple()))
248     llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) {
249                    Values.emplace_back(GPU.Name);});
250   else
251     llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) {
252                    Values.emplace_back(GPU.Name);});
253 }
254 
255 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
256   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
257 }
258 
259 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
260                                    const TargetOptions &Opts)
261     : TargetInfo(Triple),
262       GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) {
263   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
264                                         : DataLayoutStringR600);
265   assert(DataLayout->getAllocaAddrSpace() == Private);
266 
267   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
268                      !isAMDGCN(Triple));
269   UseAddrSpaceMapMangling = true;
270 
271   // Set pointer width and alignment for target address space 0.
272   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
273   if (getMaxPointerWidth() == 64) {
274     LongWidth = LongAlign = 64;
275     SizeType = UnsignedLong;
276     PtrDiffType = SignedLong;
277     IntPtrType = SignedLong;
278   }
279 
280   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
281 }
282 
283 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
284   TargetInfo::adjust(Opts);
285   // ToDo: There are still a few places using default address space as private
286   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
287   // can be removed from the following line.
288   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
289                      !isAMDGCN(getTriple()));
290 }
291 
292 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
293   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
294                                              Builtin::FirstTSBuiltin);
295 }
296 
297 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
298                                         MacroBuilder &Builder) const {
299   Builder.defineMacro("__AMD__");
300   Builder.defineMacro("__AMDGPU__");
301 
302   if (isAMDGCN(getTriple()))
303     Builder.defineMacro("__AMDGCN__");
304   else
305     Builder.defineMacro("__R600__");
306 
307   if (GPU.Kind != GK_NONE)
308     Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__"));
309 
310   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
311   // removed in the near future.
312   if (GPU.HasFMAF)
313     Builder.defineMacro("__HAS_FMAF__");
314   if (GPU.HasFastFMAF)
315     Builder.defineMacro("FP_FAST_FMAF");
316   if (GPU.HasLDEXPF)
317     Builder.defineMacro("__HAS_LDEXPF__");
318   if (GPU.HasFP64)
319     Builder.defineMacro("__HAS_FP64__");
320   if (GPU.HasFastFMA)
321     Builder.defineMacro("FP_FAST_FMA");
322 }
323