1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "clang/Frontend/CodeGenOptions.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     Generic,  // Default
42     Global,   // opencl_global
43     Local,    // opencl_local
44     Constant, // opencl_constant
45     Private,  // opencl_private
46     Generic,  // opencl_generic
47     Global,   // cuda_device
48     Constant, // cuda_constant
49     Local     // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53     Private,  // Default
54     Global,   // opencl_global
55     Local,    // opencl_local
56     Constant, // opencl_constant
57     Private,  // opencl_private
58     Generic,  // opencl_generic
59     Global,   // cuda_device
60     Constant, // cuda_constant
61     Local     // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS)                                               \
68   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
70   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119   "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123   return llvm::makeArrayRef(GCCRegNames);
124 }
125 
126 bool AMDGPUTargetInfo::initFeatureMap(
127     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128     const std::vector<std::string> &FeatureVec) const {
129 
130   using namespace llvm::AMDGPU;
131 
132   // XXX - What does the member GPU mean if device name string passed here?
133   if (isAMDGCN(getTriple())) {
134     if (CPU.empty())
135       CPU = "gfx600";
136 
137     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138     case GK_GFX906:
139       Features["dl-insts"] = true;
140       LLVM_FALLTHROUGH;
141     case GK_GFX904:
142     case GK_GFX902:
143     case GK_GFX900:
144       Features["gfx9-insts"] = true;
145       LLVM_FALLTHROUGH;
146     case GK_GFX810:
147     case GK_GFX803:
148     case GK_GFX802:
149     case GK_GFX801:
150       Features["vi-insts"] = true;
151       Features["16-bit-insts"] = true;
152       Features["dpp"] = true;
153       Features["s-memrealtime"] = true;
154       LLVM_FALLTHROUGH;
155     case GK_GFX704:
156     case GK_GFX703:
157     case GK_GFX702:
158     case GK_GFX701:
159     case GK_GFX700:
160       Features["ci-insts"] = true;
161       LLVM_FALLTHROUGH;
162     case GK_GFX601:
163     case GK_GFX600:
164       break;
165     case GK_NONE:
166       return false;
167     default:
168       llvm_unreachable("Unhandled GPU!");
169     }
170   } else {
171     if (CPU.empty())
172       CPU = "r600";
173 
174     switch (llvm::AMDGPU::parseArchR600(CPU)) {
175     case GK_CAYMAN:
176     case GK_CYPRESS:
177     case GK_RV770:
178     case GK_RV670:
179       // TODO: Add fp64 when implemented.
180       break;
181     case GK_TURKS:
182     case GK_CAICOS:
183     case GK_BARTS:
184     case GK_SUMO:
185     case GK_REDWOOD:
186     case GK_JUNIPER:
187     case GK_CEDAR:
188     case GK_RV730:
189     case GK_RV710:
190     case GK_RS880:
191     case GK_R630:
192     case GK_R600:
193       break;
194     default:
195       llvm_unreachable("Unhandled GPU!");
196     }
197   }
198 
199   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
200 }
201 
202 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
203                                            TargetOptions &TargetOpts) const {
204   bool hasFP32Denormals = false;
205   bool hasFP64Denormals = false;
206 
207   for (auto &I : TargetOpts.FeaturesAsWritten) {
208     if (I == "+fp32-denormals" || I == "-fp32-denormals")
209       hasFP32Denormals = true;
210     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
211       hasFP64Denormals = true;
212   }
213   if (!hasFP32Denormals)
214     TargetOpts.Features.push_back(
215       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
216              ? '+' : '-') + Twine("fp32-denormals"))
217             .str());
218   // Always do not flush fp64 or fp16 denorms.
219   if (!hasFP64Denormals && hasFP64())
220     TargetOpts.Features.push_back("+fp64-fp16-denormals");
221 }
222 
223 void AMDGPUTargetInfo::fillValidCPUList(
224     SmallVectorImpl<StringRef> &Values) const {
225   if (isAMDGCN(getTriple()))
226     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
227   else
228     llvm::AMDGPU::fillValidArchListR600(Values);
229 }
230 
231 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
232   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
233 }
234 
235 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
236                                    const TargetOptions &Opts)
237     : TargetInfo(Triple),
238       GPUKind(isAMDGCN(Triple) ?
239               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
240               llvm::AMDGPU::parseArchR600(Opts.CPU)),
241       GPUFeatures(isAMDGCN(Triple) ?
242                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
243                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
244   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
245                                         : DataLayoutStringR600);
246   assert(DataLayout->getAllocaAddrSpace() == Private);
247 
248   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
249                      !isAMDGCN(Triple));
250   UseAddrSpaceMapMangling = true;
251 
252   // Set pointer width and alignment for target address space 0.
253   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
254   if (getMaxPointerWidth() == 64) {
255     LongWidth = LongAlign = 64;
256     SizeType = UnsignedLong;
257     PtrDiffType = SignedLong;
258     IntPtrType = SignedLong;
259   }
260 
261   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
262 }
263 
264 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
265   TargetInfo::adjust(Opts);
266   // ToDo: There are still a few places using default address space as private
267   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
268   // can be removed from the following line.
269   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
270                      !isAMDGCN(getTriple()));
271 }
272 
273 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
274   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
275                                              Builtin::FirstTSBuiltin);
276 }
277 
278 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
279                                         MacroBuilder &Builder) const {
280   Builder.defineMacro("__AMD__");
281   Builder.defineMacro("__AMDGPU__");
282 
283   if (isAMDGCN(getTriple()))
284     Builder.defineMacro("__AMDGCN__");
285   else
286     Builder.defineMacro("__R600__");
287 
288   if (GPUKind != llvm::AMDGPU::GK_NONE) {
289     StringRef CanonName = isAMDGCN(getTriple()) ?
290       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
291     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
292   }
293 
294   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
295   // removed in the near future.
296   if (hasFMAF())
297     Builder.defineMacro("__HAS_FMAF__");
298   if (hasFastFMAF())
299     Builder.defineMacro("FP_FAST_FMAF");
300   if (hasLDEXPF())
301     Builder.defineMacro("__HAS_LDEXPF__");
302   if (hasFP64())
303     Builder.defineMacro("__HAS_FP64__");
304   if (hasFastFMA())
305     Builder.defineMacro("FP_FAST_FMA");
306 }
307