1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/CodeGenOptions.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 #include "llvm/ADT/StringSwitch.h"
21
22 using namespace clang;
23 using namespace clang::targets;
24
25 namespace clang {
26 namespace targets {
27
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30
31 static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34
35 static const char *const DataLayoutStringAMDGCN =
36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
39
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41 Generic, // Default
42 Global, // opencl_global
43 Local, // opencl_local
44 Constant, // opencl_constant
45 Private, // opencl_private
46 Generic, // opencl_generic
47 Global, // cuda_device
48 Constant, // cuda_constant
49 Local // cuda_shared
50 };
51
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53 Private, // Default
54 Global, // opencl_global
55 Local, // opencl_local
56 Constant, // opencl_constant
57 Private, // opencl_private
58 Generic, // opencl_generic
59 Global, // cuda_device
60 Constant, // cuda_constant
61 Local // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS) \
68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119 "flat_scratch_lo", "flat_scratch_hi"
120 };
121
getGCCRegNames() const122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123 return llvm::makeArrayRef(GCCRegNames);
124 }
125
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const126 bool AMDGPUTargetInfo::initFeatureMap(
127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128 const std::vector<std::string> &FeatureVec) const {
129
130 using namespace llvm::AMDGPU;
131
132 // XXX - What does the member GPU mean if device name string passed here?
133 if (isAMDGCN(getTriple())) {
134 if (CPU.empty())
135 CPU = "gfx600";
136
137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138 case GK_GFX906:
139 Features["dl-insts"] = true;
140 Features["dot-insts"] = true;
141 LLVM_FALLTHROUGH;
142 case GK_GFX909:
143 case GK_GFX904:
144 case GK_GFX902:
145 case GK_GFX900:
146 Features["gfx9-insts"] = true;
147 LLVM_FALLTHROUGH;
148 case GK_GFX810:
149 case GK_GFX803:
150 case GK_GFX802:
151 case GK_GFX801:
152 Features["vi-insts"] = true;
153 Features["16-bit-insts"] = true;
154 Features["dpp"] = true;
155 Features["s-memrealtime"] = true;
156 LLVM_FALLTHROUGH;
157 case GK_GFX704:
158 case GK_GFX703:
159 case GK_GFX702:
160 case GK_GFX701:
161 case GK_GFX700:
162 Features["ci-insts"] = true;
163 LLVM_FALLTHROUGH;
164 case GK_GFX601:
165 case GK_GFX600:
166 break;
167 case GK_NONE:
168 return false;
169 default:
170 llvm_unreachable("Unhandled GPU!");
171 }
172 } else {
173 if (CPU.empty())
174 CPU = "r600";
175
176 switch (llvm::AMDGPU::parseArchR600(CPU)) {
177 case GK_CAYMAN:
178 case GK_CYPRESS:
179 case GK_RV770:
180 case GK_RV670:
181 // TODO: Add fp64 when implemented.
182 break;
183 case GK_TURKS:
184 case GK_CAICOS:
185 case GK_BARTS:
186 case GK_SUMO:
187 case GK_REDWOOD:
188 case GK_JUNIPER:
189 case GK_CEDAR:
190 case GK_RV730:
191 case GK_RV710:
192 case GK_RS880:
193 case GK_R630:
194 case GK_R600:
195 break;
196 default:
197 llvm_unreachable("Unhandled GPU!");
198 }
199 }
200
201 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
202 }
203
adjustTargetOptions(const CodeGenOptions & CGOpts,TargetOptions & TargetOpts) const204 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
205 TargetOptions &TargetOpts) const {
206 bool hasFP32Denormals = false;
207 bool hasFP64Denormals = false;
208
209 for (auto &I : TargetOpts.FeaturesAsWritten) {
210 if (I == "+fp32-denormals" || I == "-fp32-denormals")
211 hasFP32Denormals = true;
212 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
213 hasFP64Denormals = true;
214 }
215 if (!hasFP32Denormals)
216 TargetOpts.Features.push_back(
217 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
218 ? '+' : '-') + Twine("fp32-denormals"))
219 .str());
220 // Always do not flush fp64 or fp16 denorms.
221 if (!hasFP64Denormals && hasFP64())
222 TargetOpts.Features.push_back("+fp64-fp16-denormals");
223 }
224
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const225 void AMDGPUTargetInfo::fillValidCPUList(
226 SmallVectorImpl<StringRef> &Values) const {
227 if (isAMDGCN(getTriple()))
228 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
229 else
230 llvm::AMDGPU::fillValidArchListR600(Values);
231 }
232
setAddressSpaceMap(bool DefaultIsPrivate)233 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
234 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
235 }
236
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)237 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
238 const TargetOptions &Opts)
239 : TargetInfo(Triple),
240 GPUKind(isAMDGCN(Triple) ?
241 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
242 llvm::AMDGPU::parseArchR600(Opts.CPU)),
243 GPUFeatures(isAMDGCN(Triple) ?
244 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
245 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
246 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
247 : DataLayoutStringR600);
248 assert(DataLayout->getAllocaAddrSpace() == Private);
249
250 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
251 !isAMDGCN(Triple));
252 UseAddrSpaceMapMangling = true;
253
254 // Set pointer width and alignment for target address space 0.
255 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
256 if (getMaxPointerWidth() == 64) {
257 LongWidth = LongAlign = 64;
258 SizeType = UnsignedLong;
259 PtrDiffType = SignedLong;
260 IntPtrType = SignedLong;
261 }
262
263 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
264 }
265
adjust(LangOptions & Opts)266 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
267 TargetInfo::adjust(Opts);
268 // ToDo: There are still a few places using default address space as private
269 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
270 // can be removed from the following line.
271 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
272 !isAMDGCN(getTriple()));
273 }
274
getTargetBuiltins() const275 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
276 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
277 Builtin::FirstTSBuiltin);
278 }
279
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const280 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
281 MacroBuilder &Builder) const {
282 Builder.defineMacro("__AMD__");
283 Builder.defineMacro("__AMDGPU__");
284
285 if (isAMDGCN(getTriple()))
286 Builder.defineMacro("__AMDGCN__");
287 else
288 Builder.defineMacro("__R600__");
289
290 if (GPUKind != llvm::AMDGPU::GK_NONE) {
291 StringRef CanonName = isAMDGCN(getTriple()) ?
292 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
293 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
294 }
295
296 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
297 // removed in the near future.
298 if (hasFMAF())
299 Builder.defineMacro("__HAS_FMAF__");
300 if (hasFastFMAF())
301 Builder.defineMacro("FP_FAST_FMAF");
302 if (hasLDEXPF())
303 Builder.defineMacro("__HAS_LDEXPF__");
304 if (hasFP64())
305 Builder.defineMacro("__HAS_FP64__");
306 if (hasFastFMA())
307 Builder.defineMacro("FP_FAST_FMA");
308 }
309