1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // opencl_global_device 48 Global, // opencl_global_host 49 Global, // cuda_device 50 Constant, // cuda_constant 51 Local, // cuda_shared 52 Global, // sycl_global 53 Global, // sycl_global_device 54 Global, // sycl_global_host 55 Local, // sycl_local 56 Private, // sycl_private 57 Generic, // ptr32_sptr 58 Generic, // ptr32_uptr 59 Generic // ptr64 60 }; 61 62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 63 Private, // Default 64 Global, // opencl_global 65 Local, // opencl_local 66 Constant, // opencl_constant 67 Private, // opencl_private 68 Generic, // opencl_generic 69 Global, // opencl_global_device 70 Global, // opencl_global_host 71 Global, // cuda_device 72 Constant, // cuda_constant 73 Local, // cuda_shared 74 // SYCL address space values for this map are dummy 75 Generic, // sycl_global 76 Generic, // sycl_global_device 77 Generic, // sycl_global_host 78 Generic, // sycl_local 79 Generic, // sycl_private 80 Generic, // ptr32_sptr 81 Generic, // ptr32_uptr 82 Generic // ptr64 83 84 }; 85 } // namespace targets 86 } // namespace clang 87 88 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 89 #define BUILTIN(ID, TYPE, ATTRS) \ 90 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 91 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 92 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 93 #include "clang/Basic/BuiltinsAMDGPU.def" 94 }; 95 96 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 97 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 98 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 99 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 100 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 101 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 102 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 103 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 104 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 105 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 106 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 107 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 108 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 109 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 110 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 111 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 112 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 113 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 114 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 115 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 116 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 117 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 118 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 119 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 120 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 121 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 122 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 123 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 124 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 125 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 126 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 127 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 128 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 129 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 130 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 131 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 132 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 133 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 134 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 135 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 136 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 137 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 138 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 139 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 140 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 141 "flat_scratch_lo", "flat_scratch_hi", 142 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 143 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 144 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 145 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 146 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 147 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 148 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 149 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 150 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 151 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 152 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 153 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 154 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 155 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 156 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 157 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 158 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 159 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 160 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 161 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 162 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 163 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 164 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 165 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 166 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 167 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 168 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 169 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 170 "a252", "a253", "a254", "a255" 171 }; 172 173 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 174 return llvm::makeArrayRef(GCCRegNames); 175 } 176 177 bool AMDGPUTargetInfo::initFeatureMap( 178 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 179 const std::vector<std::string> &FeatureVec) const { 180 181 using namespace llvm::AMDGPU; 182 183 // XXX - What does the member GPU mean if device name string passed here? 184 if (isAMDGCN(getTriple())) { 185 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 186 case GK_GFX1035: 187 case GK_GFX1034: 188 case GK_GFX1033: 189 case GK_GFX1032: 190 case GK_GFX1031: 191 case GK_GFX1030: 192 Features["ci-insts"] = true; 193 Features["dot1-insts"] = true; 194 Features["dot2-insts"] = true; 195 Features["dot5-insts"] = true; 196 Features["dot6-insts"] = true; 197 Features["dot7-insts"] = true; 198 Features["dl-insts"] = true; 199 Features["flat-address-space"] = true; 200 Features["16-bit-insts"] = true; 201 Features["dpp"] = true; 202 Features["gfx8-insts"] = true; 203 Features["gfx9-insts"] = true; 204 Features["gfx10-insts"] = true; 205 Features["gfx10-3-insts"] = true; 206 Features["s-memrealtime"] = true; 207 Features["s-memtime-inst"] = true; 208 break; 209 case GK_GFX1012: 210 case GK_GFX1011: 211 Features["dot1-insts"] = true; 212 Features["dot2-insts"] = true; 213 Features["dot5-insts"] = true; 214 Features["dot6-insts"] = true; 215 Features["dot7-insts"] = true; 216 LLVM_FALLTHROUGH; 217 case GK_GFX1013: 218 case GK_GFX1010: 219 Features["dl-insts"] = true; 220 Features["ci-insts"] = true; 221 Features["flat-address-space"] = true; 222 Features["16-bit-insts"] = true; 223 Features["dpp"] = true; 224 Features["gfx8-insts"] = true; 225 Features["gfx9-insts"] = true; 226 Features["gfx10-insts"] = true; 227 Features["s-memrealtime"] = true; 228 Features["s-memtime-inst"] = true; 229 break; 230 case GK_GFX940: 231 Features["gfx940-insts"] = true; 232 LLVM_FALLTHROUGH; 233 case GK_GFX90A: 234 Features["gfx90a-insts"] = true; 235 LLVM_FALLTHROUGH; 236 case GK_GFX908: 237 Features["dot3-insts"] = true; 238 Features["dot4-insts"] = true; 239 Features["dot5-insts"] = true; 240 Features["dot6-insts"] = true; 241 Features["mai-insts"] = true; 242 LLVM_FALLTHROUGH; 243 case GK_GFX906: 244 Features["dl-insts"] = true; 245 Features["dot1-insts"] = true; 246 Features["dot2-insts"] = true; 247 Features["dot7-insts"] = true; 248 LLVM_FALLTHROUGH; 249 case GK_GFX90C: 250 case GK_GFX909: 251 case GK_GFX904: 252 case GK_GFX902: 253 case GK_GFX900: 254 Features["gfx9-insts"] = true; 255 LLVM_FALLTHROUGH; 256 case GK_GFX810: 257 case GK_GFX805: 258 case GK_GFX803: 259 case GK_GFX802: 260 case GK_GFX801: 261 Features["gfx8-insts"] = true; 262 Features["16-bit-insts"] = true; 263 Features["dpp"] = true; 264 Features["s-memrealtime"] = true; 265 LLVM_FALLTHROUGH; 266 case GK_GFX705: 267 case GK_GFX704: 268 case GK_GFX703: 269 case GK_GFX702: 270 case GK_GFX701: 271 case GK_GFX700: 272 Features["ci-insts"] = true; 273 Features["flat-address-space"] = true; 274 LLVM_FALLTHROUGH; 275 case GK_GFX602: 276 case GK_GFX601: 277 case GK_GFX600: 278 Features["s-memtime-inst"] = true; 279 break; 280 case GK_NONE: 281 break; 282 default: 283 llvm_unreachable("Unhandled GPU!"); 284 } 285 } else { 286 if (CPU.empty()) 287 CPU = "r600"; 288 289 switch (llvm::AMDGPU::parseArchR600(CPU)) { 290 case GK_CAYMAN: 291 case GK_CYPRESS: 292 case GK_RV770: 293 case GK_RV670: 294 // TODO: Add fp64 when implemented. 295 break; 296 case GK_TURKS: 297 case GK_CAICOS: 298 case GK_BARTS: 299 case GK_SUMO: 300 case GK_REDWOOD: 301 case GK_JUNIPER: 302 case GK_CEDAR: 303 case GK_RV730: 304 case GK_RV710: 305 case GK_RS880: 306 case GK_R630: 307 case GK_R600: 308 break; 309 default: 310 llvm_unreachable("Unhandled GPU!"); 311 } 312 } 313 314 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 315 } 316 317 void AMDGPUTargetInfo::fillValidCPUList( 318 SmallVectorImpl<StringRef> &Values) const { 319 if (isAMDGCN(getTriple())) 320 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 321 else 322 llvm::AMDGPU::fillValidArchListR600(Values); 323 } 324 325 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 326 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 327 } 328 329 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 330 const TargetOptions &Opts) 331 : TargetInfo(Triple), 332 GPUKind(isAMDGCN(Triple) ? 333 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 334 llvm::AMDGPU::parseArchR600(Opts.CPU)), 335 GPUFeatures(isAMDGCN(Triple) ? 336 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 337 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 338 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 339 : DataLayoutStringR600); 340 341 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 342 !isAMDGCN(Triple)); 343 UseAddrSpaceMapMangling = true; 344 345 HasLegalHalfType = true; 346 HasFloat16 = true; 347 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 348 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 349 350 // Set pointer width and alignment for target address space 0. 351 PointerWidth = PointerAlign = getPointerWidthV(Generic); 352 if (getMaxPointerWidth() == 64) { 353 LongWidth = LongAlign = 64; 354 SizeType = UnsignedLong; 355 PtrDiffType = SignedLong; 356 IntPtrType = SignedLong; 357 } 358 359 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 360 } 361 362 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { 363 TargetInfo::adjust(Diags, Opts); 364 // ToDo: There are still a few places using default address space as private 365 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 366 // can be removed from the following line. 367 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 368 !isAMDGCN(getTriple())); 369 } 370 371 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 372 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 373 Builtin::FirstTSBuiltin); 374 } 375 376 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 377 MacroBuilder &Builder) const { 378 Builder.defineMacro("__AMD__"); 379 Builder.defineMacro("__AMDGPU__"); 380 381 if (isAMDGCN(getTriple())) 382 Builder.defineMacro("__AMDGCN__"); 383 else 384 Builder.defineMacro("__R600__"); 385 386 if (GPUKind != llvm::AMDGPU::GK_NONE) { 387 StringRef CanonName = isAMDGCN(getTriple()) ? 388 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 389 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 390 if (isAMDGCN(getTriple())) { 391 Builder.defineMacro("__amdgcn_processor__", 392 Twine("\"") + Twine(CanonName) + Twine("\"")); 393 Builder.defineMacro("__amdgcn_target_id__", 394 Twine("\"") + Twine(getTargetID().getValue()) + 395 Twine("\"")); 396 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 397 auto Loc = OffloadArchFeatures.find(F); 398 if (Loc != OffloadArchFeatures.end()) { 399 std::string NewF = F.str(); 400 std::replace(NewF.begin(), NewF.end(), '-', '_'); 401 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 402 Twine("__"), 403 Loc->second ? "1" : "0"); 404 } 405 } 406 } 407 } 408 409 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 410 // removed in the near future. 411 if (hasFMAF()) 412 Builder.defineMacro("__HAS_FMAF__"); 413 if (hasFastFMAF()) 414 Builder.defineMacro("FP_FAST_FMAF"); 415 if (hasLDEXPF()) 416 Builder.defineMacro("__HAS_LDEXPF__"); 417 if (hasFP64()) 418 Builder.defineMacro("__HAS_FP64__"); 419 if (hasFastFMA()) 420 Builder.defineMacro("FP_FAST_FMA"); 421 422 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 423 } 424 425 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 426 assert(HalfFormat == Aux->HalfFormat); 427 assert(FloatFormat == Aux->FloatFormat); 428 assert(DoubleFormat == Aux->DoubleFormat); 429 430 // On x86_64 long double is 80-bit extended precision format, which is 431 // not supported by AMDGPU. 128-bit floating point format is also not 432 // supported by AMDGPU. Therefore keep its own format for these two types. 433 auto SaveLongDoubleFormat = LongDoubleFormat; 434 auto SaveFloat128Format = Float128Format; 435 copyAuxTarget(Aux); 436 LongDoubleFormat = SaveLongDoubleFormat; 437 Float128Format = SaveFloat128Format; 438 // For certain builtin types support on the host target, claim they are 439 // support to pass the compilation of the host code during the device-side 440 // compilation. 441 // FIXME: As the side effect, we also accept `__float128` uses in the device 442 // code. To rejct these builtin types supported in the host target but not in 443 // the device target, one approach would support `device_builtin` attribute 444 // so that we could tell the device builtin types from the host ones. The 445 // also solves the different representations of the same builtin type, such 446 // as `size_t` in the MSVC environment. 447 if (Aux->hasFloat128Type()) { 448 HasFloat128 = true; 449 Float128Format = DoubleFormat; 450 } 451 } 452