1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "Targets.h"
16 #include "clang/Basic/Builtins.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 #include "clang/Frontend/CodeGenOptions.h"
21 #include "llvm/ADT/StringSwitch.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // cuda_device
49     Constant, // cuda_constant
50     Local     // cuda_shared
51 };
52 
53 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
54     Private,  // Default
55     Global,   // opencl_global
56     Local,    // opencl_local
57     Constant, // opencl_constant
58     Private,  // opencl_private
59     Generic,  // opencl_generic
60     Global,   // cuda_device
61     Constant, // cuda_constant
62     Local     // cuda_shared
63 };
64 } // namespace targets
65 } // namespace clang
66 
67 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
68 #define BUILTIN(ID, TYPE, ATTRS)                                               \
69   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
70 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
71   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
72 #include "clang/Basic/BuiltinsAMDGPU.def"
73 };
74 
75 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
76   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
77   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
78   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
79   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
80   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
81   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
82   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
83   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
84   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
85   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
86   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
87   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
88   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
89   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
90   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
91   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
92   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
93   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
94   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
95   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
96   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
97   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
98   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
99   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
100   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
101   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
102   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
103   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
104   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
105   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
106   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
107   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
108   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
109   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
110   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
111   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
112   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
113   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
114   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
115   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
116   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
117   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
118   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
119   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
120   "flat_scratch_lo", "flat_scratch_hi"
121 };
122 
123 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
124   return llvm::makeArrayRef(GCCRegNames);
125 }
126 
127 bool AMDGPUTargetInfo::initFeatureMap(
128     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
129     const std::vector<std::string> &FeatureVec) const {
130 
131   // XXX - What does the member GPU mean if device name string passed here?
132   if (isAMDGCN(getTriple())) {
133     if (CPU.empty())
134       CPU = "gfx600";
135 
136     switch (parseAMDGCNName(CPU).Kind) {
137     case GK_GFX902:
138     case GK_GFX900:
139       Features["gfx9-insts"] = true;
140       LLVM_FALLTHROUGH;
141     case GK_GFX810:
142     case GK_GFX803:
143     case GK_GFX802:
144     case GK_GFX801:
145       Features["16-bit-insts"] = true;
146       Features["dpp"] = true;
147       Features["s-memrealtime"] = true;
148       break;
149     case GK_GFX704:
150     case GK_GFX703:
151     case GK_GFX702:
152     case GK_GFX701:
153     case GK_GFX700:
154     case GK_GFX601:
155     case GK_GFX600:
156       break;
157     case GK_NONE:
158       return false;
159     default:
160       llvm_unreachable("Unhandled GPU!");
161     }
162   } else {
163     if (CPU.empty())
164       CPU = "r600";
165 
166     switch (parseR600Name(CPU).Kind) {
167     case GK_CAYMAN:
168     case GK_CYPRESS:
169     case GK_RV770:
170     case GK_RV670:
171       // TODO: Add fp64 when implemented.
172       break;
173     case GK_TURKS:
174     case GK_CAICOS:
175     case GK_BARTS:
176     case GK_SUMO:
177     case GK_REDWOOD:
178     case GK_JUNIPER:
179     case GK_CEDAR:
180     case GK_RV730:
181     case GK_RV710:
182     case GK_RS880:
183     case GK_R630:
184     case GK_R600:
185       break;
186     default:
187       llvm_unreachable("Unhandled GPU!");
188     }
189   }
190 
191   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
192 }
193 
194 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
195                                            TargetOptions &TargetOpts) const {
196   bool hasFP32Denormals = false;
197   bool hasFP64Denormals = false;
198   GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU);
199   for (auto &I : TargetOpts.FeaturesAsWritten) {
200     if (I == "+fp32-denormals" || I == "-fp32-denormals")
201       hasFP32Denormals = true;
202     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
203       hasFP64Denormals = true;
204   }
205   if (!hasFP32Denormals)
206     TargetOpts.Features.push_back(
207         (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm
208                    ? '+'
209                    : '-') +
210          Twine("fp32-denormals"))
211             .str());
212   // Always do not flush fp64 or fp16 denorms.
213   if (!hasFP64Denormals && CGOptsGPU.HasFP64)
214     TargetOpts.Features.push_back("+fp64-fp16-denormals");
215 }
216 
217 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU;
218 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[];
219 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[];
220 
221 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) {
222   const auto *Result = llvm::find_if(
223       R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
224 
225   if (Result == std::end(R600GPUs))
226     return InvalidGPU;
227   return *Result;
228 }
229 
230 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
231   const auto *Result = llvm::find_if(
232       AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
233 
234   if (Result == std::end(AMDGCNGPUs))
235     return InvalidGPU;
236   return *Result;
237 }
238 
239 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const {
240   if (isAMDGCN(getTriple()))
241     return parseAMDGCNName(Name);
242   else
243     return parseR600Name(Name);
244 }
245 
246 void AMDGPUTargetInfo::fillValidCPUList(
247     SmallVectorImpl<StringRef> &Values) const {
248   if (isAMDGCN(getTriple()))
249     llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) {
250                    Values.emplace_back(GPU.Name);});
251   else
252     llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) {
253                    Values.emplace_back(GPU.Name);});
254 }
255 
256 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
257   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
258 }
259 
260 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
261                                    const TargetOptions &Opts)
262     : TargetInfo(Triple),
263       GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) {
264   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
265                                         : DataLayoutStringR600);
266   assert(DataLayout->getAllocaAddrSpace() == Private);
267   GCN_Subarch = CudaArch::GFX803; // Default to fiji
268 
269   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
270                      !isAMDGCN(Triple));
271   UseAddrSpaceMapMangling = true;
272 
273   // Set pointer width and alignment for target address space 0.
274   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
275   if (getMaxPointerWidth() == 64) {
276     LongWidth = LongAlign = 64;
277     SizeType = UnsignedLong;
278     PtrDiffType = SignedLong;
279     IntPtrType = SignedLong;
280   }
281 
282   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
283 }
284 
285 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
286   TargetInfo::adjust(Opts);
287   // ToDo: There are still a few places using default address space as private
288   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
289   // can be removed from the following line.
290   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
291                      !isAMDGCN(getTriple()));
292 }
293 
294 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
295   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
296                                              Builtin::FirstTSBuiltin);
297 }
298 
299 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
300                                         MacroBuilder &Builder) const {
301   Builder.defineMacro("__AMD__");
302   Builder.defineMacro("__AMDGPU__");
303 
304   if (isAMDGCN(getTriple()))
305     Builder.defineMacro("__AMDGCN__");
306   else
307     Builder.defineMacro("__R600__");
308 
309   if (GPU.Kind != GK_NONE)
310     Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__"));
311 
312   if (Opts.CUDAIsDevice)
313     defineCudaArchMacro(GCN_Subarch, Builder);
314 
315   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
316   // removed in the near future.
317   if (GPU.HasFMAF)
318     Builder.defineMacro("__HAS_FMAF__");
319   if (GPU.HasFastFMAF)
320     Builder.defineMacro("FP_FAST_FMAF");
321   if (GPU.HasLDEXPF)
322     Builder.defineMacro("__HAS_LDEXPF__");
323   if (GPU.HasFP64)
324     Builder.defineMacro("__HAS_FP64__");
325   if (GPU.HasFastFMA)
326     Builder.defineMacro("FP_FAST_FMA");
327 }
328