1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Generic, // ptr32_sptr 55 Generic, // ptr32_uptr 56 Generic // ptr64 57 }; 58 59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 60 Private, // Default 61 Global, // opencl_global 62 Local, // opencl_local 63 Constant, // opencl_constant 64 Private, // opencl_private 65 Generic, // opencl_generic 66 Global, // opencl_global_device 67 Global, // opencl_global_host 68 Global, // cuda_device 69 Constant, // cuda_constant 70 Local, // cuda_shared 71 Generic, // ptr32_sptr 72 Generic, // ptr32_uptr 73 Generic // ptr64 74 75 }; 76 } // namespace targets 77 } // namespace clang 78 79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 80 #define BUILTIN(ID, TYPE, ATTRS) \ 81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 84 #include "clang/Basic/BuiltinsAMDGPU.def" 85 }; 86 87 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 132 "flat_scratch_lo", "flat_scratch_hi", 133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 161 "a252", "a253", "a254", "a255" 162 }; 163 164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 165 return llvm::makeArrayRef(GCCRegNames); 166 } 167 168 bool AMDGPUTargetInfo::initFeatureMap( 169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 170 const std::vector<std::string> &FeatureVec) const { 171 172 using namespace llvm::AMDGPU; 173 174 // XXX - What does the member GPU mean if device name string passed here? 175 if (isAMDGCN(getTriple())) { 176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 177 case GK_GFX1033: 178 case GK_GFX1032: 179 case GK_GFX1031: 180 case GK_GFX1030: 181 Features["ci-insts"] = true; 182 Features["dot1-insts"] = true; 183 Features["dot2-insts"] = true; 184 Features["dot5-insts"] = true; 185 Features["dot6-insts"] = true; 186 Features["dot7-insts"] = true; 187 Features["dl-insts"] = true; 188 Features["flat-address-space"] = true; 189 Features["16-bit-insts"] = true; 190 Features["dpp"] = true; 191 Features["gfx8-insts"] = true; 192 Features["gfx9-insts"] = true; 193 Features["gfx10-insts"] = true; 194 Features["gfx10-3-insts"] = true; 195 Features["s-memrealtime"] = true; 196 Features["s-memtime-inst"] = true; 197 break; 198 case GK_GFX1012: 199 case GK_GFX1011: 200 Features["dot1-insts"] = true; 201 Features["dot2-insts"] = true; 202 Features["dot5-insts"] = true; 203 Features["dot6-insts"] = true; 204 Features["dot7-insts"] = true; 205 LLVM_FALLTHROUGH; 206 case GK_GFX1010: 207 Features["dl-insts"] = true; 208 Features["ci-insts"] = true; 209 Features["flat-address-space"] = true; 210 Features["16-bit-insts"] = true; 211 Features["dpp"] = true; 212 Features["gfx8-insts"] = true; 213 Features["gfx9-insts"] = true; 214 Features["gfx10-insts"] = true; 215 Features["s-memrealtime"] = true; 216 Features["s-memtime-inst"] = true; 217 break; 218 case GK_GFX90A: 219 Features["gfx90a-insts"] = true; 220 LLVM_FALLTHROUGH; 221 case GK_GFX908: 222 Features["dot3-insts"] = true; 223 Features["dot4-insts"] = true; 224 Features["dot5-insts"] = true; 225 Features["dot6-insts"] = true; 226 Features["mai-insts"] = true; 227 LLVM_FALLTHROUGH; 228 case GK_GFX906: 229 Features["dl-insts"] = true; 230 Features["dot1-insts"] = true; 231 Features["dot2-insts"] = true; 232 Features["dot7-insts"] = true; 233 LLVM_FALLTHROUGH; 234 case GK_GFX90C: 235 case GK_GFX909: 236 case GK_GFX904: 237 case GK_GFX902: 238 case GK_GFX900: 239 Features["gfx9-insts"] = true; 240 LLVM_FALLTHROUGH; 241 case GK_GFX810: 242 case GK_GFX805: 243 case GK_GFX803: 244 case GK_GFX802: 245 case GK_GFX801: 246 Features["gfx8-insts"] = true; 247 Features["16-bit-insts"] = true; 248 Features["dpp"] = true; 249 Features["s-memrealtime"] = true; 250 LLVM_FALLTHROUGH; 251 case GK_GFX705: 252 case GK_GFX704: 253 case GK_GFX703: 254 case GK_GFX702: 255 case GK_GFX701: 256 case GK_GFX700: 257 Features["ci-insts"] = true; 258 Features["flat-address-space"] = true; 259 LLVM_FALLTHROUGH; 260 case GK_GFX602: 261 case GK_GFX601: 262 case GK_GFX600: 263 Features["s-memtime-inst"] = true; 264 break; 265 case GK_NONE: 266 break; 267 default: 268 llvm_unreachable("Unhandled GPU!"); 269 } 270 } else { 271 if (CPU.empty()) 272 CPU = "r600"; 273 274 switch (llvm::AMDGPU::parseArchR600(CPU)) { 275 case GK_CAYMAN: 276 case GK_CYPRESS: 277 case GK_RV770: 278 case GK_RV670: 279 // TODO: Add fp64 when implemented. 280 break; 281 case GK_TURKS: 282 case GK_CAICOS: 283 case GK_BARTS: 284 case GK_SUMO: 285 case GK_REDWOOD: 286 case GK_JUNIPER: 287 case GK_CEDAR: 288 case GK_RV730: 289 case GK_RV710: 290 case GK_RS880: 291 case GK_R630: 292 case GK_R600: 293 break; 294 default: 295 llvm_unreachable("Unhandled GPU!"); 296 } 297 } 298 299 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 300 } 301 302 void AMDGPUTargetInfo::fillValidCPUList( 303 SmallVectorImpl<StringRef> &Values) const { 304 if (isAMDGCN(getTriple())) 305 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 306 else 307 llvm::AMDGPU::fillValidArchListR600(Values); 308 } 309 310 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 311 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 312 } 313 314 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 315 const TargetOptions &Opts) 316 : TargetInfo(Triple), 317 GPUKind(isAMDGCN(Triple) ? 318 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 319 llvm::AMDGPU::parseArchR600(Opts.CPU)), 320 GPUFeatures(isAMDGCN(Triple) ? 321 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 322 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 323 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 324 : DataLayoutStringR600); 325 assert(DataLayout->getAllocaAddrSpace() == Private); 326 GridValues = llvm::omp::AMDGPUGpuGridValues; 327 328 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 329 !isAMDGCN(Triple)); 330 UseAddrSpaceMapMangling = true; 331 332 HasLegalHalfType = true; 333 HasFloat16 = true; 334 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 335 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 336 337 // Set pointer width and alignment for target address space 0. 338 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 339 if (getMaxPointerWidth() == 64) { 340 LongWidth = LongAlign = 64; 341 SizeType = UnsignedLong; 342 PtrDiffType = SignedLong; 343 IntPtrType = SignedLong; 344 } 345 346 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 347 } 348 349 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 350 TargetInfo::adjust(Opts); 351 // ToDo: There are still a few places using default address space as private 352 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 353 // can be removed from the following line. 354 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 355 !isAMDGCN(getTriple())); 356 } 357 358 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 359 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 360 Builtin::FirstTSBuiltin); 361 } 362 363 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 364 MacroBuilder &Builder) const { 365 Builder.defineMacro("__AMD__"); 366 Builder.defineMacro("__AMDGPU__"); 367 368 if (isAMDGCN(getTriple())) 369 Builder.defineMacro("__AMDGCN__"); 370 else 371 Builder.defineMacro("__R600__"); 372 373 if (GPUKind != llvm::AMDGPU::GK_NONE) { 374 StringRef CanonName = isAMDGCN(getTriple()) ? 375 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 376 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 377 if (isAMDGCN(getTriple())) { 378 Builder.defineMacro("__amdgcn_processor__", 379 Twine("\"") + Twine(CanonName) + Twine("\"")); 380 Builder.defineMacro("__amdgcn_target_id__", 381 Twine("\"") + Twine(getTargetID().getValue()) + 382 Twine("\"")); 383 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 384 auto Loc = OffloadArchFeatures.find(F); 385 if (Loc != OffloadArchFeatures.end()) { 386 std::string NewF = F.str(); 387 std::replace(NewF.begin(), NewF.end(), '-', '_'); 388 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 389 Twine("__"), 390 Loc->second ? "1" : "0"); 391 } 392 } 393 } 394 } 395 396 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 397 // removed in the near future. 398 if (hasFMAF()) 399 Builder.defineMacro("__HAS_FMAF__"); 400 if (hasFastFMAF()) 401 Builder.defineMacro("FP_FAST_FMAF"); 402 if (hasLDEXPF()) 403 Builder.defineMacro("__HAS_LDEXPF__"); 404 if (hasFP64()) 405 Builder.defineMacro("__HAS_FP64__"); 406 if (hasFastFMA()) 407 Builder.defineMacro("FP_FAST_FMA"); 408 409 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 410 } 411 412 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 413 assert(HalfFormat == Aux->HalfFormat); 414 assert(FloatFormat == Aux->FloatFormat); 415 assert(DoubleFormat == Aux->DoubleFormat); 416 417 // On x86_64 long double is 80-bit extended precision format, which is 418 // not supported by AMDGPU. 128-bit floating point format is also not 419 // supported by AMDGPU. Therefore keep its own format for these two types. 420 auto SaveLongDoubleFormat = LongDoubleFormat; 421 auto SaveFloat128Format = Float128Format; 422 copyAuxTarget(Aux); 423 LongDoubleFormat = SaveLongDoubleFormat; 424 Float128Format = SaveFloat128Format; 425 // For certain builtin types support on the host target, claim they are 426 // support to pass the compilation of the host code during the device-side 427 // compilation. 428 // FIXME: As the side effect, we also accept `__float128` uses in the device 429 // code. To rejct these builtin types supported in the host target but not in 430 // the device target, one approach would support `device_builtin` attribute 431 // so that we could tell the device builtin types from the host ones. The 432 // also solves the different representations of the same builtin type, such 433 // as `size_t` in the MSVC environment. 434 if (Aux->hasFloat128Type()) { 435 HasFloat128 = true; 436 Float128Format = DoubleFormat; 437 } 438 } 439