1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // opencl_global_device 48 Global, // opencl_global_host 49 Global, // cuda_device 50 Constant, // cuda_constant 51 Local, // cuda_shared 52 Global, // sycl_global 53 Global, // sycl_global_device 54 Global, // sycl_global_host 55 Local, // sycl_local 56 Private, // sycl_private 57 Generic, // ptr32_sptr 58 Generic, // ptr32_uptr 59 Generic // ptr64 60 }; 61 62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 63 Private, // Default 64 Global, // opencl_global 65 Local, // opencl_local 66 Constant, // opencl_constant 67 Private, // opencl_private 68 Generic, // opencl_generic 69 Global, // opencl_global_device 70 Global, // opencl_global_host 71 Global, // cuda_device 72 Constant, // cuda_constant 73 Local, // cuda_shared 74 // SYCL address space values for this map are dummy 75 Generic, // sycl_global 76 Generic, // sycl_global_device 77 Generic, // sycl_global_host 78 Generic, // sycl_local 79 Generic, // sycl_private 80 Generic, // ptr32_sptr 81 Generic, // ptr32_uptr 82 Generic // ptr64 83 84 }; 85 } // namespace targets 86 } // namespace clang 87 88 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 89 #define BUILTIN(ID, TYPE, ATTRS) \ 90 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 91 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 92 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 93 #include "clang/Basic/BuiltinsAMDGPU.def" 94 }; 95 96 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 97 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 98 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 99 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 100 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 101 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 102 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 103 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 104 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 105 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 106 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 107 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 108 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 109 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 110 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 111 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 112 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 113 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 114 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 115 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 116 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 117 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 118 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 119 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 120 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 121 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 122 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 123 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 124 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 125 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 126 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 127 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 128 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 129 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 130 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 131 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 132 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 133 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 134 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 135 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 136 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 137 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 138 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 139 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 140 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 141 "flat_scratch_lo", "flat_scratch_hi", 142 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 143 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 144 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 145 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 146 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 147 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 148 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 149 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 150 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 151 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 152 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 153 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 154 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 155 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 156 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 157 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 158 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 159 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 160 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 161 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 162 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 163 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 164 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 165 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 166 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 167 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 168 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 169 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 170 "a252", "a253", "a254", "a255" 171 }; 172 173 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 174 return llvm::makeArrayRef(GCCRegNames); 175 } 176 177 bool AMDGPUTargetInfo::initFeatureMap( 178 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 179 const std::vector<std::string> &FeatureVec) const { 180 181 using namespace llvm::AMDGPU; 182 183 // XXX - What does the member GPU mean if device name string passed here? 184 if (isAMDGCN(getTriple())) { 185 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 186 case GK_GFX1036: 187 case GK_GFX1035: 188 case GK_GFX1034: 189 case GK_GFX1033: 190 case GK_GFX1032: 191 case GK_GFX1031: 192 case GK_GFX1030: 193 Features["ci-insts"] = true; 194 Features["dot1-insts"] = true; 195 Features["dot2-insts"] = true; 196 Features["dot5-insts"] = true; 197 Features["dot6-insts"] = true; 198 Features["dot7-insts"] = true; 199 Features["dl-insts"] = true; 200 Features["flat-address-space"] = true; 201 Features["16-bit-insts"] = true; 202 Features["dpp"] = true; 203 Features["gfx8-insts"] = true; 204 Features["gfx9-insts"] = true; 205 Features["gfx10-insts"] = true; 206 Features["gfx10-3-insts"] = true; 207 Features["s-memrealtime"] = true; 208 Features["s-memtime-inst"] = true; 209 break; 210 case GK_GFX1012: 211 case GK_GFX1011: 212 Features["dot1-insts"] = true; 213 Features["dot2-insts"] = true; 214 Features["dot5-insts"] = true; 215 Features["dot6-insts"] = true; 216 Features["dot7-insts"] = true; 217 LLVM_FALLTHROUGH; 218 case GK_GFX1013: 219 case GK_GFX1010: 220 Features["dl-insts"] = true; 221 Features["ci-insts"] = true; 222 Features["flat-address-space"] = true; 223 Features["16-bit-insts"] = true; 224 Features["dpp"] = true; 225 Features["gfx8-insts"] = true; 226 Features["gfx9-insts"] = true; 227 Features["gfx10-insts"] = true; 228 Features["s-memrealtime"] = true; 229 Features["s-memtime-inst"] = true; 230 break; 231 case GK_GFX940: 232 Features["gfx940-insts"] = true; 233 LLVM_FALLTHROUGH; 234 case GK_GFX90A: 235 Features["gfx90a-insts"] = true; 236 LLVM_FALLTHROUGH; 237 case GK_GFX908: 238 Features["dot3-insts"] = true; 239 Features["dot4-insts"] = true; 240 Features["dot5-insts"] = true; 241 Features["dot6-insts"] = true; 242 Features["mai-insts"] = true; 243 LLVM_FALLTHROUGH; 244 case GK_GFX906: 245 Features["dl-insts"] = true; 246 Features["dot1-insts"] = true; 247 Features["dot2-insts"] = true; 248 Features["dot7-insts"] = true; 249 LLVM_FALLTHROUGH; 250 case GK_GFX90C: 251 case GK_GFX909: 252 case GK_GFX904: 253 case GK_GFX902: 254 case GK_GFX900: 255 Features["gfx9-insts"] = true; 256 LLVM_FALLTHROUGH; 257 case GK_GFX810: 258 case GK_GFX805: 259 case GK_GFX803: 260 case GK_GFX802: 261 case GK_GFX801: 262 Features["gfx8-insts"] = true; 263 Features["16-bit-insts"] = true; 264 Features["dpp"] = true; 265 Features["s-memrealtime"] = true; 266 LLVM_FALLTHROUGH; 267 case GK_GFX705: 268 case GK_GFX704: 269 case GK_GFX703: 270 case GK_GFX702: 271 case GK_GFX701: 272 case GK_GFX700: 273 Features["ci-insts"] = true; 274 Features["flat-address-space"] = true; 275 LLVM_FALLTHROUGH; 276 case GK_GFX602: 277 case GK_GFX601: 278 case GK_GFX600: 279 Features["s-memtime-inst"] = true; 280 break; 281 case GK_NONE: 282 break; 283 default: 284 llvm_unreachable("Unhandled GPU!"); 285 } 286 } else { 287 if (CPU.empty()) 288 CPU = "r600"; 289 290 switch (llvm::AMDGPU::parseArchR600(CPU)) { 291 case GK_CAYMAN: 292 case GK_CYPRESS: 293 case GK_RV770: 294 case GK_RV670: 295 // TODO: Add fp64 when implemented. 296 break; 297 case GK_TURKS: 298 case GK_CAICOS: 299 case GK_BARTS: 300 case GK_SUMO: 301 case GK_REDWOOD: 302 case GK_JUNIPER: 303 case GK_CEDAR: 304 case GK_RV730: 305 case GK_RV710: 306 case GK_RS880: 307 case GK_R630: 308 case GK_R600: 309 break; 310 default: 311 llvm_unreachable("Unhandled GPU!"); 312 } 313 } 314 315 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 316 } 317 318 void AMDGPUTargetInfo::fillValidCPUList( 319 SmallVectorImpl<StringRef> &Values) const { 320 if (isAMDGCN(getTriple())) 321 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 322 else 323 llvm::AMDGPU::fillValidArchListR600(Values); 324 } 325 326 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 327 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 328 } 329 330 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 331 const TargetOptions &Opts) 332 : TargetInfo(Triple), 333 GPUKind(isAMDGCN(Triple) ? 334 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 335 llvm::AMDGPU::parseArchR600(Opts.CPU)), 336 GPUFeatures(isAMDGCN(Triple) ? 337 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 338 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 339 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 340 : DataLayoutStringR600); 341 342 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 343 !isAMDGCN(Triple)); 344 UseAddrSpaceMapMangling = true; 345 346 HasLegalHalfType = true; 347 HasFloat16 = true; 348 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 349 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 350 351 // Set pointer width and alignment for target address space 0. 352 PointerWidth = PointerAlign = getPointerWidthV(Generic); 353 if (getMaxPointerWidth() == 64) { 354 LongWidth = LongAlign = 64; 355 SizeType = UnsignedLong; 356 PtrDiffType = SignedLong; 357 IntPtrType = SignedLong; 358 } 359 360 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 361 } 362 363 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { 364 TargetInfo::adjust(Diags, Opts); 365 // ToDo: There are still a few places using default address space as private 366 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 367 // can be removed from the following line. 368 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 369 !isAMDGCN(getTriple())); 370 } 371 372 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 373 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 374 Builtin::FirstTSBuiltin); 375 } 376 377 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 378 MacroBuilder &Builder) const { 379 Builder.defineMacro("__AMD__"); 380 Builder.defineMacro("__AMDGPU__"); 381 382 if (isAMDGCN(getTriple())) 383 Builder.defineMacro("__AMDGCN__"); 384 else 385 Builder.defineMacro("__R600__"); 386 387 if (GPUKind != llvm::AMDGPU::GK_NONE) { 388 StringRef CanonName = isAMDGCN(getTriple()) ? 389 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 390 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 391 if (isAMDGCN(getTriple())) { 392 Builder.defineMacro("__amdgcn_processor__", 393 Twine("\"") + Twine(CanonName) + Twine("\"")); 394 Builder.defineMacro("__amdgcn_target_id__", 395 Twine("\"") + Twine(getTargetID().getValue()) + 396 Twine("\"")); 397 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 398 auto Loc = OffloadArchFeatures.find(F); 399 if (Loc != OffloadArchFeatures.end()) { 400 std::string NewF = F.str(); 401 std::replace(NewF.begin(), NewF.end(), '-', '_'); 402 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 403 Twine("__"), 404 Loc->second ? "1" : "0"); 405 } 406 } 407 } 408 } 409 410 if (AllowAMDGPUUnsafeFPAtomics) 411 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__"); 412 413 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 414 // removed in the near future. 415 if (hasFMAF()) 416 Builder.defineMacro("__HAS_FMAF__"); 417 if (hasFastFMAF()) 418 Builder.defineMacro("FP_FAST_FMAF"); 419 if (hasLDEXPF()) 420 Builder.defineMacro("__HAS_LDEXPF__"); 421 if (hasFP64()) 422 Builder.defineMacro("__HAS_FP64__"); 423 if (hasFastFMA()) 424 Builder.defineMacro("FP_FAST_FMA"); 425 426 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 427 } 428 429 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 430 assert(HalfFormat == Aux->HalfFormat); 431 assert(FloatFormat == Aux->FloatFormat); 432 assert(DoubleFormat == Aux->DoubleFormat); 433 434 // On x86_64 long double is 80-bit extended precision format, which is 435 // not supported by AMDGPU. 128-bit floating point format is also not 436 // supported by AMDGPU. Therefore keep its own format for these two types. 437 auto SaveLongDoubleFormat = LongDoubleFormat; 438 auto SaveFloat128Format = Float128Format; 439 copyAuxTarget(Aux); 440 LongDoubleFormat = SaveLongDoubleFormat; 441 Float128Format = SaveFloat128Format; 442 // For certain builtin types support on the host target, claim they are 443 // support to pass the compilation of the host code during the device-side 444 // compilation. 445 // FIXME: As the side effect, we also accept `__float128` uses in the device 446 // code. To rejct these builtin types supported in the host target but not in 447 // the device target, one approach would support `device_builtin` attribute 448 // so that we could tell the device builtin types from the host ones. The 449 // also solves the different representations of the same builtin type, such 450 // as `size_t` in the MSVC environment. 451 if (Aux->hasFloat128Type()) { 452 HasFloat128 = true; 453 Float128Format = DoubleFormat; 454 } 455 } 456