1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Global, // sycl_global 55 Local, // sycl_local 56 Private, // sycl_private 57 Generic, // ptr32_sptr 58 Generic, // ptr32_uptr 59 Generic // ptr64 60 }; 61 62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 63 Private, // Default 64 Global, // opencl_global 65 Local, // opencl_local 66 Constant, // opencl_constant 67 Private, // opencl_private 68 Generic, // opencl_generic 69 Global, // opencl_global_device 70 Global, // opencl_global_host 71 Global, // cuda_device 72 Constant, // cuda_constant 73 Local, // cuda_shared 74 // SYCL address space values for this map are dummy 75 Generic, // sycl_global 76 Generic, // sycl_local 77 Generic, // sycl_private 78 Generic, // ptr32_sptr 79 Generic, // ptr32_uptr 80 Generic // ptr64 81 82 }; 83 } // namespace targets 84 } // namespace clang 85 86 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 87 #define BUILTIN(ID, TYPE, ATTRS) \ 88 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 89 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 90 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 91 #include "clang/Basic/BuiltinsAMDGPU.def" 92 }; 93 94 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 95 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 96 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 97 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 98 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 99 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 100 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 101 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 102 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 103 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 104 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 105 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 106 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 107 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 108 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 109 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 110 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 111 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 112 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 113 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 114 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 115 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 116 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 117 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 118 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 119 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 120 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 121 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 122 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 123 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 124 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 125 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 126 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 127 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 128 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 129 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 130 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 131 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 132 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 133 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 134 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 135 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 136 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 137 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 138 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 139 "flat_scratch_lo", "flat_scratch_hi", 140 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 141 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 142 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 143 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 144 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 145 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 146 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 147 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 148 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 149 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 150 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 151 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 152 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 153 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 154 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 155 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 156 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 157 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 158 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 159 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 160 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 161 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 162 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 163 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 164 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 165 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 166 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 167 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 168 "a252", "a253", "a254", "a255" 169 }; 170 171 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 172 return llvm::makeArrayRef(GCCRegNames); 173 } 174 175 bool AMDGPUTargetInfo::initFeatureMap( 176 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 177 const std::vector<std::string> &FeatureVec) const { 178 179 using namespace llvm::AMDGPU; 180 181 // XXX - What does the member GPU mean if device name string passed here? 182 if (isAMDGCN(getTriple())) { 183 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 184 case GK_GFX1033: 185 case GK_GFX1032: 186 case GK_GFX1031: 187 case GK_GFX1030: 188 Features["ci-insts"] = true; 189 Features["dot1-insts"] = true; 190 Features["dot2-insts"] = true; 191 Features["dot5-insts"] = true; 192 Features["dot6-insts"] = true; 193 Features["dot7-insts"] = true; 194 Features["dl-insts"] = true; 195 Features["flat-address-space"] = true; 196 Features["16-bit-insts"] = true; 197 Features["dpp"] = true; 198 Features["gfx8-insts"] = true; 199 Features["gfx9-insts"] = true; 200 Features["gfx10-insts"] = true; 201 Features["gfx10-3-insts"] = true; 202 Features["s-memrealtime"] = true; 203 Features["s-memtime-inst"] = true; 204 break; 205 case GK_GFX1012: 206 case GK_GFX1011: 207 Features["dot1-insts"] = true; 208 Features["dot2-insts"] = true; 209 Features["dot5-insts"] = true; 210 Features["dot6-insts"] = true; 211 Features["dot7-insts"] = true; 212 LLVM_FALLTHROUGH; 213 case GK_GFX1010: 214 Features["dl-insts"] = true; 215 Features["ci-insts"] = true; 216 Features["flat-address-space"] = true; 217 Features["16-bit-insts"] = true; 218 Features["dpp"] = true; 219 Features["gfx8-insts"] = true; 220 Features["gfx9-insts"] = true; 221 Features["gfx10-insts"] = true; 222 Features["s-memrealtime"] = true; 223 Features["s-memtime-inst"] = true; 224 break; 225 case GK_GFX90A: 226 Features["gfx90a-insts"] = true; 227 LLVM_FALLTHROUGH; 228 case GK_GFX908: 229 Features["dot3-insts"] = true; 230 Features["dot4-insts"] = true; 231 Features["dot5-insts"] = true; 232 Features["dot6-insts"] = true; 233 Features["mai-insts"] = true; 234 LLVM_FALLTHROUGH; 235 case GK_GFX906: 236 Features["dl-insts"] = true; 237 Features["dot1-insts"] = true; 238 Features["dot2-insts"] = true; 239 Features["dot7-insts"] = true; 240 LLVM_FALLTHROUGH; 241 case GK_GFX90C: 242 case GK_GFX909: 243 case GK_GFX904: 244 case GK_GFX902: 245 case GK_GFX900: 246 Features["gfx9-insts"] = true; 247 LLVM_FALLTHROUGH; 248 case GK_GFX810: 249 case GK_GFX805: 250 case GK_GFX803: 251 case GK_GFX802: 252 case GK_GFX801: 253 Features["gfx8-insts"] = true; 254 Features["16-bit-insts"] = true; 255 Features["dpp"] = true; 256 Features["s-memrealtime"] = true; 257 LLVM_FALLTHROUGH; 258 case GK_GFX705: 259 case GK_GFX704: 260 case GK_GFX703: 261 case GK_GFX702: 262 case GK_GFX701: 263 case GK_GFX700: 264 Features["ci-insts"] = true; 265 Features["flat-address-space"] = true; 266 LLVM_FALLTHROUGH; 267 case GK_GFX602: 268 case GK_GFX601: 269 case GK_GFX600: 270 Features["s-memtime-inst"] = true; 271 break; 272 case GK_NONE: 273 break; 274 default: 275 llvm_unreachable("Unhandled GPU!"); 276 } 277 } else { 278 if (CPU.empty()) 279 CPU = "r600"; 280 281 switch (llvm::AMDGPU::parseArchR600(CPU)) { 282 case GK_CAYMAN: 283 case GK_CYPRESS: 284 case GK_RV770: 285 case GK_RV670: 286 // TODO: Add fp64 when implemented. 287 break; 288 case GK_TURKS: 289 case GK_CAICOS: 290 case GK_BARTS: 291 case GK_SUMO: 292 case GK_REDWOOD: 293 case GK_JUNIPER: 294 case GK_CEDAR: 295 case GK_RV730: 296 case GK_RV710: 297 case GK_RS880: 298 case GK_R630: 299 case GK_R600: 300 break; 301 default: 302 llvm_unreachable("Unhandled GPU!"); 303 } 304 } 305 306 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 307 } 308 309 void AMDGPUTargetInfo::fillValidCPUList( 310 SmallVectorImpl<StringRef> &Values) const { 311 if (isAMDGCN(getTriple())) 312 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 313 else 314 llvm::AMDGPU::fillValidArchListR600(Values); 315 } 316 317 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 318 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 319 } 320 321 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 322 const TargetOptions &Opts) 323 : TargetInfo(Triple), 324 GPUKind(isAMDGCN(Triple) ? 325 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 326 llvm::AMDGPU::parseArchR600(Opts.CPU)), 327 GPUFeatures(isAMDGCN(Triple) ? 328 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 329 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 330 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 331 : DataLayoutStringR600); 332 assert(DataLayout->getAllocaAddrSpace() == Private); 333 GridValues = llvm::omp::AMDGPUGpuGridValues; 334 335 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 336 !isAMDGCN(Triple)); 337 UseAddrSpaceMapMangling = true; 338 339 HasLegalHalfType = true; 340 HasFloat16 = true; 341 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 342 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 343 344 // Set pointer width and alignment for target address space 0. 345 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 346 if (getMaxPointerWidth() == 64) { 347 LongWidth = LongAlign = 64; 348 SizeType = UnsignedLong; 349 PtrDiffType = SignedLong; 350 IntPtrType = SignedLong; 351 } 352 353 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 354 } 355 356 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 357 TargetInfo::adjust(Opts); 358 // ToDo: There are still a few places using default address space as private 359 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 360 // can be removed from the following line. 361 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 362 !isAMDGCN(getTriple())); 363 } 364 365 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 366 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 367 Builtin::FirstTSBuiltin); 368 } 369 370 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 371 MacroBuilder &Builder) const { 372 Builder.defineMacro("__AMD__"); 373 Builder.defineMacro("__AMDGPU__"); 374 375 if (isAMDGCN(getTriple())) 376 Builder.defineMacro("__AMDGCN__"); 377 else 378 Builder.defineMacro("__R600__"); 379 380 if (GPUKind != llvm::AMDGPU::GK_NONE) { 381 StringRef CanonName = isAMDGCN(getTriple()) ? 382 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 383 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 384 if (isAMDGCN(getTriple())) { 385 Builder.defineMacro("__amdgcn_processor__", 386 Twine("\"") + Twine(CanonName) + Twine("\"")); 387 Builder.defineMacro("__amdgcn_target_id__", 388 Twine("\"") + Twine(getTargetID().getValue()) + 389 Twine("\"")); 390 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 391 auto Loc = OffloadArchFeatures.find(F); 392 if (Loc != OffloadArchFeatures.end()) { 393 std::string NewF = F.str(); 394 std::replace(NewF.begin(), NewF.end(), '-', '_'); 395 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 396 Twine("__"), 397 Loc->second ? "1" : "0"); 398 } 399 } 400 } 401 } 402 403 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 404 // removed in the near future. 405 if (hasFMAF()) 406 Builder.defineMacro("__HAS_FMAF__"); 407 if (hasFastFMAF()) 408 Builder.defineMacro("FP_FAST_FMAF"); 409 if (hasLDEXPF()) 410 Builder.defineMacro("__HAS_LDEXPF__"); 411 if (hasFP64()) 412 Builder.defineMacro("__HAS_FP64__"); 413 if (hasFastFMA()) 414 Builder.defineMacro("FP_FAST_FMA"); 415 416 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 417 } 418 419 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 420 assert(HalfFormat == Aux->HalfFormat); 421 assert(FloatFormat == Aux->FloatFormat); 422 assert(DoubleFormat == Aux->DoubleFormat); 423 424 // On x86_64 long double is 80-bit extended precision format, which is 425 // not supported by AMDGPU. 128-bit floating point format is also not 426 // supported by AMDGPU. Therefore keep its own format for these two types. 427 auto SaveLongDoubleFormat = LongDoubleFormat; 428 auto SaveFloat128Format = Float128Format; 429 copyAuxTarget(Aux); 430 LongDoubleFormat = SaveLongDoubleFormat; 431 Float128Format = SaveFloat128Format; 432 // For certain builtin types support on the host target, claim they are 433 // support to pass the compilation of the host code during the device-side 434 // compilation. 435 // FIXME: As the side effect, we also accept `__float128` uses in the device 436 // code. To rejct these builtin types supported in the host target but not in 437 // the device target, one approach would support `device_builtin` attribute 438 // so that we could tell the device builtin types from the host ones. The 439 // also solves the different representations of the same builtin type, such 440 // as `size_t` in the MSVC environment. 441 if (Aux->hasFloat128Type()) { 442 HasFloat128 = true; 443 Float128Format = DoubleFormat; 444 } 445 } 446