1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 39 "-ni:7"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 Generic, // Default 43 Global, // opencl_global 44 Local, // opencl_local 45 Constant, // opencl_constant 46 Private, // opencl_private 47 Generic, // opencl_generic 48 Global, // opencl_global_device 49 Global, // opencl_global_host 50 Global, // cuda_device 51 Constant, // cuda_constant 52 Local, // cuda_shared 53 Global, // sycl_global 54 Local, // sycl_local 55 Private, // sycl_private 56 Generic, // ptr32_sptr 57 Generic, // ptr32_uptr 58 Generic // ptr64 59 }; 60 61 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 62 Private, // Default 63 Global, // opencl_global 64 Local, // opencl_local 65 Constant, // opencl_constant 66 Private, // opencl_private 67 Generic, // opencl_generic 68 Global, // opencl_global_device 69 Global, // opencl_global_host 70 Global, // cuda_device 71 Constant, // cuda_constant 72 Local, // cuda_shared 73 // SYCL address space values for this map are dummy 74 Generic, // sycl_global 75 Generic, // sycl_local 76 Generic, // sycl_private 77 Generic, // ptr32_sptr 78 Generic, // ptr32_uptr 79 Generic // ptr64 80 81 }; 82 } // namespace targets 83 } // namespace clang 84 85 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 86 #define BUILTIN(ID, TYPE, ATTRS) \ 87 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 88 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 89 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 90 #include "clang/Basic/BuiltinsAMDGPU.def" 91 }; 92 93 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 94 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 95 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 96 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 97 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 98 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 99 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 100 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 101 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 102 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 103 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 104 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 105 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 106 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 107 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 108 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 109 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 110 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 111 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 112 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 113 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 114 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 115 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 116 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 117 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 118 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 119 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 120 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 121 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 122 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 123 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 124 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 125 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 126 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 127 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 128 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 129 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 130 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 131 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 132 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 133 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 134 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 135 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 136 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 137 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 138 "flat_scratch_lo", "flat_scratch_hi", 139 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 140 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 141 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 142 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 143 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 144 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 145 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 146 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 147 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 148 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 149 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 150 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 151 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 152 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 153 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 154 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 155 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 156 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 157 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 158 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 159 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 160 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 161 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 162 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 163 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 164 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 165 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 166 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 167 "a252", "a253", "a254", "a255" 168 }; 169 170 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 171 return llvm::makeArrayRef(GCCRegNames); 172 } 173 174 bool AMDGPUTargetInfo::initFeatureMap( 175 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 176 const std::vector<std::string> &FeatureVec) const { 177 178 using namespace llvm::AMDGPU; 179 180 // XXX - What does the member GPU mean if device name string passed here? 181 if (isAMDGCN(getTriple())) { 182 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 183 case GK_GFX1033: 184 case GK_GFX1032: 185 case GK_GFX1031: 186 case GK_GFX1030: 187 Features["ci-insts"] = true; 188 Features["dot1-insts"] = true; 189 Features["dot2-insts"] = true; 190 Features["dot5-insts"] = true; 191 Features["dot6-insts"] = true; 192 Features["dot7-insts"] = true; 193 Features["dl-insts"] = true; 194 Features["flat-address-space"] = true; 195 Features["16-bit-insts"] = true; 196 Features["dpp"] = true; 197 Features["gfx8-insts"] = true; 198 Features["gfx9-insts"] = true; 199 Features["gfx10-insts"] = true; 200 Features["gfx10-3-insts"] = true; 201 Features["s-memrealtime"] = true; 202 Features["s-memtime-inst"] = true; 203 break; 204 case GK_GFX1012: 205 case GK_GFX1011: 206 Features["dot1-insts"] = true; 207 Features["dot2-insts"] = true; 208 Features["dot5-insts"] = true; 209 Features["dot6-insts"] = true; 210 Features["dot7-insts"] = true; 211 LLVM_FALLTHROUGH; 212 case GK_GFX1010: 213 Features["dl-insts"] = true; 214 Features["ci-insts"] = true; 215 Features["flat-address-space"] = true; 216 Features["16-bit-insts"] = true; 217 Features["dpp"] = true; 218 Features["gfx8-insts"] = true; 219 Features["gfx9-insts"] = true; 220 Features["gfx10-insts"] = true; 221 Features["s-memrealtime"] = true; 222 Features["s-memtime-inst"] = true; 223 break; 224 case GK_GFX90A: 225 Features["gfx90a-insts"] = true; 226 LLVM_FALLTHROUGH; 227 case GK_GFX908: 228 Features["dot3-insts"] = true; 229 Features["dot4-insts"] = true; 230 Features["dot5-insts"] = true; 231 Features["dot6-insts"] = true; 232 Features["mai-insts"] = true; 233 LLVM_FALLTHROUGH; 234 case GK_GFX906: 235 Features["dl-insts"] = true; 236 Features["dot1-insts"] = true; 237 Features["dot2-insts"] = true; 238 Features["dot7-insts"] = true; 239 LLVM_FALLTHROUGH; 240 case GK_GFX90C: 241 case GK_GFX909: 242 case GK_GFX904: 243 case GK_GFX902: 244 case GK_GFX900: 245 Features["gfx9-insts"] = true; 246 LLVM_FALLTHROUGH; 247 case GK_GFX810: 248 case GK_GFX805: 249 case GK_GFX803: 250 case GK_GFX802: 251 case GK_GFX801: 252 Features["gfx8-insts"] = true; 253 Features["16-bit-insts"] = true; 254 Features["dpp"] = true; 255 Features["s-memrealtime"] = true; 256 LLVM_FALLTHROUGH; 257 case GK_GFX705: 258 case GK_GFX704: 259 case GK_GFX703: 260 case GK_GFX702: 261 case GK_GFX701: 262 case GK_GFX700: 263 Features["ci-insts"] = true; 264 Features["flat-address-space"] = true; 265 LLVM_FALLTHROUGH; 266 case GK_GFX602: 267 case GK_GFX601: 268 case GK_GFX600: 269 Features["s-memtime-inst"] = true; 270 break; 271 case GK_NONE: 272 break; 273 default: 274 llvm_unreachable("Unhandled GPU!"); 275 } 276 } else { 277 if (CPU.empty()) 278 CPU = "r600"; 279 280 switch (llvm::AMDGPU::parseArchR600(CPU)) { 281 case GK_CAYMAN: 282 case GK_CYPRESS: 283 case GK_RV770: 284 case GK_RV670: 285 // TODO: Add fp64 when implemented. 286 break; 287 case GK_TURKS: 288 case GK_CAICOS: 289 case GK_BARTS: 290 case GK_SUMO: 291 case GK_REDWOOD: 292 case GK_JUNIPER: 293 case GK_CEDAR: 294 case GK_RV730: 295 case GK_RV710: 296 case GK_RS880: 297 case GK_R630: 298 case GK_R600: 299 break; 300 default: 301 llvm_unreachable("Unhandled GPU!"); 302 } 303 } 304 305 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 306 } 307 308 void AMDGPUTargetInfo::fillValidCPUList( 309 SmallVectorImpl<StringRef> &Values) const { 310 if (isAMDGCN(getTriple())) 311 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 312 else 313 llvm::AMDGPU::fillValidArchListR600(Values); 314 } 315 316 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 317 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 318 } 319 320 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 321 const TargetOptions &Opts) 322 : TargetInfo(Triple), 323 GPUKind(isAMDGCN(Triple) ? 324 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 325 llvm::AMDGPU::parseArchR600(Opts.CPU)), 326 GPUFeatures(isAMDGCN(Triple) ? 327 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 328 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 329 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 330 : DataLayoutStringR600); 331 GridValues = llvm::omp::AMDGPUGpuGridValues; 332 333 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 334 !isAMDGCN(Triple)); 335 UseAddrSpaceMapMangling = true; 336 337 HasLegalHalfType = true; 338 HasFloat16 = true; 339 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 340 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 341 342 // Set pointer width and alignment for target address space 0. 343 PointerWidth = PointerAlign = getPointerWidthV(Generic); 344 if (getMaxPointerWidth() == 64) { 345 LongWidth = LongAlign = 64; 346 SizeType = UnsignedLong; 347 PtrDiffType = SignedLong; 348 IntPtrType = SignedLong; 349 } 350 351 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 352 } 353 354 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 355 TargetInfo::adjust(Opts); 356 // ToDo: There are still a few places using default address space as private 357 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 358 // can be removed from the following line. 359 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 360 !isAMDGCN(getTriple())); 361 } 362 363 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 364 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 365 Builtin::FirstTSBuiltin); 366 } 367 368 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 369 MacroBuilder &Builder) const { 370 Builder.defineMacro("__AMD__"); 371 Builder.defineMacro("__AMDGPU__"); 372 373 if (isAMDGCN(getTriple())) 374 Builder.defineMacro("__AMDGCN__"); 375 else 376 Builder.defineMacro("__R600__"); 377 378 if (GPUKind != llvm::AMDGPU::GK_NONE) { 379 StringRef CanonName = isAMDGCN(getTriple()) ? 380 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 381 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 382 if (isAMDGCN(getTriple())) { 383 Builder.defineMacro("__amdgcn_processor__", 384 Twine("\"") + Twine(CanonName) + Twine("\"")); 385 Builder.defineMacro("__amdgcn_target_id__", 386 Twine("\"") + Twine(getTargetID().getValue()) + 387 Twine("\"")); 388 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 389 auto Loc = OffloadArchFeatures.find(F); 390 if (Loc != OffloadArchFeatures.end()) { 391 std::string NewF = F.str(); 392 std::replace(NewF.begin(), NewF.end(), '-', '_'); 393 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 394 Twine("__"), 395 Loc->second ? "1" : "0"); 396 } 397 } 398 } 399 } 400 401 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 402 // removed in the near future. 403 if (hasFMAF()) 404 Builder.defineMacro("__HAS_FMAF__"); 405 if (hasFastFMAF()) 406 Builder.defineMacro("FP_FAST_FMAF"); 407 if (hasLDEXPF()) 408 Builder.defineMacro("__HAS_LDEXPF__"); 409 if (hasFP64()) 410 Builder.defineMacro("__HAS_FP64__"); 411 if (hasFastFMA()) 412 Builder.defineMacro("FP_FAST_FMA"); 413 414 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 415 } 416 417 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 418 assert(HalfFormat == Aux->HalfFormat); 419 assert(FloatFormat == Aux->FloatFormat); 420 assert(DoubleFormat == Aux->DoubleFormat); 421 422 // On x86_64 long double is 80-bit extended precision format, which is 423 // not supported by AMDGPU. 128-bit floating point format is also not 424 // supported by AMDGPU. Therefore keep its own format for these two types. 425 auto SaveLongDoubleFormat = LongDoubleFormat; 426 auto SaveFloat128Format = Float128Format; 427 copyAuxTarget(Aux); 428 LongDoubleFormat = SaveLongDoubleFormat; 429 Float128Format = SaveFloat128Format; 430 // For certain builtin types support on the host target, claim they are 431 // support to pass the compilation of the host code during the device-side 432 // compilation. 433 // FIXME: As the side effect, we also accept `__float128` uses in the device 434 // code. To rejct these builtin types supported in the host target but not in 435 // the device target, one approach would support `device_builtin` attribute 436 // so that we could tell the device builtin types from the host ones. The 437 // also solves the different representations of the same builtin type, such 438 // as `size_t` in the MSVC environment. 439 if (Aux->hasFloat128Type()) { 440 HasFloat128 = true; 441 Float128Format = DoubleFormat; 442 } 443 } 444