1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Generic, // ptr32_sptr 55 Generic, // ptr32_uptr 56 Generic // ptr64 57 }; 58 59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 60 Private, // Default 61 Global, // opencl_global 62 Local, // opencl_local 63 Constant, // opencl_constant 64 Private, // opencl_private 65 Generic, // opencl_generic 66 Global, // opencl_global_device 67 Global, // opencl_global_host 68 Global, // cuda_device 69 Constant, // cuda_constant 70 Local, // cuda_shared 71 Generic, // ptr32_sptr 72 Generic, // ptr32_uptr 73 Generic // ptr64 74 75 }; 76 } // namespace targets 77 } // namespace clang 78 79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 80 #define BUILTIN(ID, TYPE, ATTRS) \ 81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 84 #include "clang/Basic/BuiltinsAMDGPU.def" 85 }; 86 87 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 132 "flat_scratch_lo", "flat_scratch_hi", 133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 161 "a252", "a253", "a254", "a255" 162 }; 163 164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 165 return llvm::makeArrayRef(GCCRegNames); 166 } 167 168 bool AMDGPUTargetInfo::initFeatureMap( 169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 170 const std::vector<std::string> &FeatureVec) const { 171 172 using namespace llvm::AMDGPU; 173 174 // XXX - What does the member GPU mean if device name string passed here? 175 if (isAMDGCN(getTriple())) { 176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 177 case GK_GFX1030: 178 Features["ci-insts"] = true; 179 Features["dot1-insts"] = true; 180 Features["dot2-insts"] = true; 181 Features["dot5-insts"] = true; 182 Features["dot6-insts"] = true; 183 Features["dl-insts"] = true; 184 Features["flat-address-space"] = true; 185 Features["16-bit-insts"] = true; 186 Features["dpp"] = true; 187 Features["gfx8-insts"] = true; 188 Features["gfx9-insts"] = true; 189 Features["gfx10-insts"] = true; 190 Features["gfx10-3-insts"] = true; 191 Features["s-memrealtime"] = true; 192 break; 193 case GK_GFX1012: 194 case GK_GFX1011: 195 Features["dot1-insts"] = true; 196 Features["dot2-insts"] = true; 197 Features["dot5-insts"] = true; 198 Features["dot6-insts"] = true; 199 LLVM_FALLTHROUGH; 200 case GK_GFX1010: 201 Features["dl-insts"] = true; 202 Features["ci-insts"] = true; 203 Features["flat-address-space"] = true; 204 Features["16-bit-insts"] = true; 205 Features["dpp"] = true; 206 Features["gfx8-insts"] = true; 207 Features["gfx9-insts"] = true; 208 Features["gfx10-insts"] = true; 209 Features["s-memrealtime"] = true; 210 break; 211 case GK_GFX908: 212 Features["dot3-insts"] = true; 213 Features["dot4-insts"] = true; 214 Features["dot5-insts"] = true; 215 Features["dot6-insts"] = true; 216 Features["mai-insts"] = true; 217 LLVM_FALLTHROUGH; 218 case GK_GFX906: 219 Features["dl-insts"] = true; 220 Features["dot1-insts"] = true; 221 Features["dot2-insts"] = true; 222 LLVM_FALLTHROUGH; 223 case GK_GFX909: 224 case GK_GFX904: 225 case GK_GFX902: 226 case GK_GFX900: 227 Features["gfx9-insts"] = true; 228 LLVM_FALLTHROUGH; 229 case GK_GFX810: 230 case GK_GFX803: 231 case GK_GFX802: 232 case GK_GFX801: 233 Features["gfx8-insts"] = true; 234 Features["16-bit-insts"] = true; 235 Features["dpp"] = true; 236 Features["s-memrealtime"] = true; 237 LLVM_FALLTHROUGH; 238 case GK_GFX704: 239 case GK_GFX703: 240 case GK_GFX702: 241 case GK_GFX701: 242 case GK_GFX700: 243 Features["ci-insts"] = true; 244 Features["flat-address-space"] = true; 245 LLVM_FALLTHROUGH; 246 case GK_GFX601: 247 case GK_GFX600: 248 break; 249 case GK_NONE: 250 break; 251 default: 252 llvm_unreachable("Unhandled GPU!"); 253 } 254 } else { 255 if (CPU.empty()) 256 CPU = "r600"; 257 258 switch (llvm::AMDGPU::parseArchR600(CPU)) { 259 case GK_CAYMAN: 260 case GK_CYPRESS: 261 case GK_RV770: 262 case GK_RV670: 263 // TODO: Add fp64 when implemented. 264 break; 265 case GK_TURKS: 266 case GK_CAICOS: 267 case GK_BARTS: 268 case GK_SUMO: 269 case GK_REDWOOD: 270 case GK_JUNIPER: 271 case GK_CEDAR: 272 case GK_RV730: 273 case GK_RV710: 274 case GK_RS880: 275 case GK_R630: 276 case GK_R600: 277 break; 278 default: 279 llvm_unreachable("Unhandled GPU!"); 280 } 281 } 282 283 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 284 } 285 286 void AMDGPUTargetInfo::fillValidCPUList( 287 SmallVectorImpl<StringRef> &Values) const { 288 if (isAMDGCN(getTriple())) 289 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 290 else 291 llvm::AMDGPU::fillValidArchListR600(Values); 292 } 293 294 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 295 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 296 } 297 298 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 299 const TargetOptions &Opts) 300 : TargetInfo(Triple), 301 GPUKind(isAMDGCN(Triple) ? 302 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 303 llvm::AMDGPU::parseArchR600(Opts.CPU)), 304 GPUFeatures(isAMDGCN(Triple) ? 305 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 306 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 307 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 308 : DataLayoutStringR600); 309 assert(DataLayout->getAllocaAddrSpace() == Private); 310 GridValues = llvm::omp::AMDGPUGpuGridValues; 311 312 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 313 !isAMDGCN(Triple)); 314 UseAddrSpaceMapMangling = true; 315 316 HasLegalHalfType = true; 317 HasFloat16 = true; 318 319 // Set pointer width and alignment for target address space 0. 320 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 321 if (getMaxPointerWidth() == 64) { 322 LongWidth = LongAlign = 64; 323 SizeType = UnsignedLong; 324 PtrDiffType = SignedLong; 325 IntPtrType = SignedLong; 326 } 327 328 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 329 } 330 331 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 332 TargetInfo::adjust(Opts); 333 // ToDo: There are still a few places using default address space as private 334 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 335 // can be removed from the following line. 336 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 337 !isAMDGCN(getTriple())); 338 } 339 340 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 341 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 342 Builtin::FirstTSBuiltin); 343 } 344 345 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 346 MacroBuilder &Builder) const { 347 Builder.defineMacro("__AMD__"); 348 Builder.defineMacro("__AMDGPU__"); 349 350 if (isAMDGCN(getTriple())) 351 Builder.defineMacro("__AMDGCN__"); 352 else 353 Builder.defineMacro("__R600__"); 354 355 if (GPUKind != llvm::AMDGPU::GK_NONE) { 356 StringRef CanonName = isAMDGCN(getTriple()) ? 357 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 358 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 359 } 360 361 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 362 // removed in the near future. 363 if (hasFMAF()) 364 Builder.defineMacro("__HAS_FMAF__"); 365 if (hasFastFMAF()) 366 Builder.defineMacro("FP_FAST_FMAF"); 367 if (hasLDEXPF()) 368 Builder.defineMacro("__HAS_LDEXPF__"); 369 if (hasFP64()) 370 Builder.defineMacro("__HAS_FP64__"); 371 if (hasFastFMA()) 372 Builder.defineMacro("FP_FAST_FMA"); 373 } 374 375 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 376 assert(HalfFormat == Aux->HalfFormat); 377 assert(FloatFormat == Aux->FloatFormat); 378 assert(DoubleFormat == Aux->DoubleFormat); 379 380 // On x86_64 long double is 80-bit extended precision format, which is 381 // not supported by AMDGPU. 128-bit floating point format is also not 382 // supported by AMDGPU. Therefore keep its own format for these two types. 383 auto SaveLongDoubleFormat = LongDoubleFormat; 384 auto SaveFloat128Format = Float128Format; 385 copyAuxTarget(Aux); 386 LongDoubleFormat = SaveLongDoubleFormat; 387 Float128Format = SaveFloat128Format; 388 // For certain builtin types support on the host target, claim they are 389 // support to pass the compilation of the host code during the device-side 390 // compilation. 391 // FIXME: As the side effect, we also accept `__float128` uses in the device 392 // code. To rejct these builtin types supported in the host target but not in 393 // the device target, one approach would support `device_builtin` attribute 394 // so that we could tell the device builtin types from the host ones. The 395 // also solves the different representations of the same builtin type, such 396 // as `size_t` in the MSVC environment. 397 if (Aux->hasFloat128Type()) { 398 HasFloat128 = true; 399 Float128Format = DoubleFormat; 400 } 401 } 402