1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 39 "-ni:7"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 Generic, // Default 43 Global, // opencl_global 44 Local, // opencl_local 45 Constant, // opencl_constant 46 Private, // opencl_private 47 Generic, // opencl_generic 48 Global, // opencl_global_device 49 Global, // opencl_global_host 50 Global, // cuda_device 51 Constant, // cuda_constant 52 Local, // cuda_shared 53 Global, // sycl_global 54 Global, // sycl_global_device 55 Global, // sycl_global_host 56 Local, // sycl_local 57 Private, // sycl_private 58 Generic, // ptr32_sptr 59 Generic, // ptr32_uptr 60 Generic // ptr64 61 }; 62 63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 64 Private, // Default 65 Global, // opencl_global 66 Local, // opencl_local 67 Constant, // opencl_constant 68 Private, // opencl_private 69 Generic, // opencl_generic 70 Global, // opencl_global_device 71 Global, // opencl_global_host 72 Global, // cuda_device 73 Constant, // cuda_constant 74 Local, // cuda_shared 75 // SYCL address space values for this map are dummy 76 Generic, // sycl_global 77 Generic, // sycl_global_device 78 Generic, // sycl_global_host 79 Generic, // sycl_local 80 Generic, // sycl_private 81 Generic, // ptr32_sptr 82 Generic, // ptr32_uptr 83 Generic // ptr64 84 85 }; 86 } // namespace targets 87 } // namespace clang 88 89 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 90 #define BUILTIN(ID, TYPE, ATTRS) \ 91 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 93 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 94 #include "clang/Basic/BuiltinsAMDGPU.def" 95 }; 96 97 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 98 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 99 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 100 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 101 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 102 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 103 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 104 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 105 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 106 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 107 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 108 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 109 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 110 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 111 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 112 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 113 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 114 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 115 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 116 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 117 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 118 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 119 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 120 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 121 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 122 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 123 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 124 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 125 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 126 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 127 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 128 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 129 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 130 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 131 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 132 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 133 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 134 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 135 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 136 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 137 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 138 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 139 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 140 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 141 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 142 "flat_scratch_lo", "flat_scratch_hi", 143 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 144 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 145 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 146 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 147 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 148 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 149 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 150 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 151 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 152 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 153 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 154 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 155 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 156 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 157 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 158 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 159 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 160 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 161 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 162 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 163 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 164 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 165 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 166 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 167 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 168 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 169 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 170 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 171 "a252", "a253", "a254", "a255" 172 }; 173 174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 175 return llvm::makeArrayRef(GCCRegNames); 176 } 177 178 bool AMDGPUTargetInfo::initFeatureMap( 179 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 180 const std::vector<std::string> &FeatureVec) const { 181 182 using namespace llvm::AMDGPU; 183 184 // XXX - What does the member GPU mean if device name string passed here? 185 if (isAMDGCN(getTriple())) { 186 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 187 case GK_GFX1034: 188 case GK_GFX1033: 189 case GK_GFX1032: 190 case GK_GFX1031: 191 case GK_GFX1030: 192 Features["ci-insts"] = true; 193 Features["dot1-insts"] = true; 194 Features["dot2-insts"] = true; 195 Features["dot5-insts"] = true; 196 Features["dot6-insts"] = true; 197 Features["dot7-insts"] = true; 198 Features["dl-insts"] = true; 199 Features["flat-address-space"] = true; 200 Features["16-bit-insts"] = true; 201 Features["dpp"] = true; 202 Features["gfx8-insts"] = true; 203 Features["gfx9-insts"] = true; 204 Features["gfx10-insts"] = true; 205 Features["gfx10-3-insts"] = true; 206 Features["s-memrealtime"] = true; 207 Features["s-memtime-inst"] = true; 208 break; 209 case GK_GFX1012: 210 case GK_GFX1011: 211 Features["dot1-insts"] = true; 212 Features["dot2-insts"] = true; 213 Features["dot5-insts"] = true; 214 Features["dot6-insts"] = true; 215 Features["dot7-insts"] = true; 216 LLVM_FALLTHROUGH; 217 case GK_GFX1013: 218 case GK_GFX1010: 219 Features["dl-insts"] = true; 220 Features["ci-insts"] = true; 221 Features["flat-address-space"] = true; 222 Features["16-bit-insts"] = true; 223 Features["dpp"] = true; 224 Features["gfx8-insts"] = true; 225 Features["gfx9-insts"] = true; 226 Features["gfx10-insts"] = true; 227 Features["s-memrealtime"] = true; 228 Features["s-memtime-inst"] = true; 229 break; 230 case GK_GFX90A: 231 Features["gfx90a-insts"] = true; 232 LLVM_FALLTHROUGH; 233 case GK_GFX908: 234 Features["dot3-insts"] = true; 235 Features["dot4-insts"] = true; 236 Features["dot5-insts"] = true; 237 Features["dot6-insts"] = true; 238 Features["mai-insts"] = true; 239 LLVM_FALLTHROUGH; 240 case GK_GFX906: 241 Features["dl-insts"] = true; 242 Features["dot1-insts"] = true; 243 Features["dot2-insts"] = true; 244 Features["dot7-insts"] = true; 245 LLVM_FALLTHROUGH; 246 case GK_GFX90C: 247 case GK_GFX909: 248 case GK_GFX904: 249 case GK_GFX902: 250 case GK_GFX900: 251 Features["gfx9-insts"] = true; 252 LLVM_FALLTHROUGH; 253 case GK_GFX810: 254 case GK_GFX805: 255 case GK_GFX803: 256 case GK_GFX802: 257 case GK_GFX801: 258 Features["gfx8-insts"] = true; 259 Features["16-bit-insts"] = true; 260 Features["dpp"] = true; 261 Features["s-memrealtime"] = true; 262 LLVM_FALLTHROUGH; 263 case GK_GFX705: 264 case GK_GFX704: 265 case GK_GFX703: 266 case GK_GFX702: 267 case GK_GFX701: 268 case GK_GFX700: 269 Features["ci-insts"] = true; 270 Features["flat-address-space"] = true; 271 LLVM_FALLTHROUGH; 272 case GK_GFX602: 273 case GK_GFX601: 274 case GK_GFX600: 275 Features["s-memtime-inst"] = true; 276 break; 277 case GK_NONE: 278 break; 279 default: 280 llvm_unreachable("Unhandled GPU!"); 281 } 282 } else { 283 if (CPU.empty()) 284 CPU = "r600"; 285 286 switch (llvm::AMDGPU::parseArchR600(CPU)) { 287 case GK_CAYMAN: 288 case GK_CYPRESS: 289 case GK_RV770: 290 case GK_RV670: 291 // TODO: Add fp64 when implemented. 292 break; 293 case GK_TURKS: 294 case GK_CAICOS: 295 case GK_BARTS: 296 case GK_SUMO: 297 case GK_REDWOOD: 298 case GK_JUNIPER: 299 case GK_CEDAR: 300 case GK_RV730: 301 case GK_RV710: 302 case GK_RS880: 303 case GK_R630: 304 case GK_R600: 305 break; 306 default: 307 llvm_unreachable("Unhandled GPU!"); 308 } 309 } 310 311 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 312 } 313 314 void AMDGPUTargetInfo::fillValidCPUList( 315 SmallVectorImpl<StringRef> &Values) const { 316 if (isAMDGCN(getTriple())) 317 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 318 else 319 llvm::AMDGPU::fillValidArchListR600(Values); 320 } 321 322 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 323 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 324 } 325 326 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 327 const TargetOptions &Opts) 328 : TargetInfo(Triple), 329 GPUKind(isAMDGCN(Triple) ? 330 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 331 llvm::AMDGPU::parseArchR600(Opts.CPU)), 332 GPUFeatures(isAMDGCN(Triple) ? 333 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 334 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 335 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 336 : DataLayoutStringR600); 337 GridValues = llvm::omp::AMDGPUGpuGridValues; 338 339 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 340 !isAMDGCN(Triple)); 341 UseAddrSpaceMapMangling = true; 342 343 HasLegalHalfType = true; 344 HasFloat16 = true; 345 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 346 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 347 348 // Set pointer width and alignment for target address space 0. 349 PointerWidth = PointerAlign = getPointerWidthV(Generic); 350 if (getMaxPointerWidth() == 64) { 351 LongWidth = LongAlign = 64; 352 SizeType = UnsignedLong; 353 PtrDiffType = SignedLong; 354 IntPtrType = SignedLong; 355 } 356 357 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 358 } 359 360 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 361 TargetInfo::adjust(Opts); 362 // ToDo: There are still a few places using default address space as private 363 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 364 // can be removed from the following line. 365 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 366 !isAMDGCN(getTriple())); 367 } 368 369 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 370 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 371 Builtin::FirstTSBuiltin); 372 } 373 374 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 375 MacroBuilder &Builder) const { 376 Builder.defineMacro("__AMD__"); 377 Builder.defineMacro("__AMDGPU__"); 378 379 if (isAMDGCN(getTriple())) 380 Builder.defineMacro("__AMDGCN__"); 381 else 382 Builder.defineMacro("__R600__"); 383 384 if (GPUKind != llvm::AMDGPU::GK_NONE) { 385 StringRef CanonName = isAMDGCN(getTriple()) ? 386 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 387 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 388 if (isAMDGCN(getTriple())) { 389 Builder.defineMacro("__amdgcn_processor__", 390 Twine("\"") + Twine(CanonName) + Twine("\"")); 391 Builder.defineMacro("__amdgcn_target_id__", 392 Twine("\"") + Twine(getTargetID().getValue()) + 393 Twine("\"")); 394 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 395 auto Loc = OffloadArchFeatures.find(F); 396 if (Loc != OffloadArchFeatures.end()) { 397 std::string NewF = F.str(); 398 std::replace(NewF.begin(), NewF.end(), '-', '_'); 399 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 400 Twine("__"), 401 Loc->second ? "1" : "0"); 402 } 403 } 404 } 405 } 406 407 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 408 // removed in the near future. 409 if (hasFMAF()) 410 Builder.defineMacro("__HAS_FMAF__"); 411 if (hasFastFMAF()) 412 Builder.defineMacro("FP_FAST_FMAF"); 413 if (hasLDEXPF()) 414 Builder.defineMacro("__HAS_LDEXPF__"); 415 if (hasFP64()) 416 Builder.defineMacro("__HAS_FP64__"); 417 if (hasFastFMA()) 418 Builder.defineMacro("FP_FAST_FMA"); 419 420 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 421 } 422 423 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 424 assert(HalfFormat == Aux->HalfFormat); 425 assert(FloatFormat == Aux->FloatFormat); 426 assert(DoubleFormat == Aux->DoubleFormat); 427 428 // On x86_64 long double is 80-bit extended precision format, which is 429 // not supported by AMDGPU. 128-bit floating point format is also not 430 // supported by AMDGPU. Therefore keep its own format for these two types. 431 auto SaveLongDoubleFormat = LongDoubleFormat; 432 auto SaveFloat128Format = Float128Format; 433 copyAuxTarget(Aux); 434 LongDoubleFormat = SaveLongDoubleFormat; 435 Float128Format = SaveFloat128Format; 436 // For certain builtin types support on the host target, claim they are 437 // support to pass the compilation of the host code during the device-side 438 // compilation. 439 // FIXME: As the side effect, we also accept `__float128` uses in the device 440 // code. To rejct these builtin types supported in the host target but not in 441 // the device target, one approach would support `device_builtin` attribute 442 // so that we could tell the device builtin types from the host ones. The 443 // also solves the different representations of the same builtin type, such 444 // as `size_t` in the MSVC environment. 445 if (Aux->hasFloat128Type()) { 446 HasFloat128 = true; 447 Float128Format = DoubleFormat; 448 } 449 } 450