1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Generic, // ptr32_sptr 55 Generic, // ptr32_uptr 56 Generic // ptr64 57 }; 58 59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 60 Private, // Default 61 Global, // opencl_global 62 Local, // opencl_local 63 Constant, // opencl_constant 64 Private, // opencl_private 65 Generic, // opencl_generic 66 Global, // opencl_global_device 67 Global, // opencl_global_host 68 Global, // cuda_device 69 Constant, // cuda_constant 70 Local, // cuda_shared 71 Generic, // ptr32_sptr 72 Generic, // ptr32_uptr 73 Generic // ptr64 74 75 }; 76 } // namespace targets 77 } // namespace clang 78 79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 80 #define BUILTIN(ID, TYPE, ATTRS) \ 81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 84 #include "clang/Basic/BuiltinsAMDGPU.def" 85 }; 86 87 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 132 "flat_scratch_lo", "flat_scratch_hi", 133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 161 "a252", "a253", "a254", "a255" 162 }; 163 164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 165 return llvm::makeArrayRef(GCCRegNames); 166 } 167 168 bool AMDGPUTargetInfo::initFeatureMap( 169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 170 const std::vector<std::string> &FeatureVec) const { 171 172 using namespace llvm::AMDGPU; 173 174 // XXX - What does the member GPU mean if device name string passed here? 175 if (isAMDGCN(getTriple())) { 176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 177 case GK_GFX1033: 178 case GK_GFX1032: 179 case GK_GFX1031: 180 case GK_GFX1030: 181 Features["ci-insts"] = true; 182 Features["dot1-insts"] = true; 183 Features["dot2-insts"] = true; 184 Features["dot5-insts"] = true; 185 Features["dot6-insts"] = true; 186 Features["dl-insts"] = true; 187 Features["flat-address-space"] = true; 188 Features["16-bit-insts"] = true; 189 Features["dpp"] = true; 190 Features["gfx8-insts"] = true; 191 Features["gfx9-insts"] = true; 192 Features["gfx10-insts"] = true; 193 Features["gfx10-3-insts"] = true; 194 Features["s-memrealtime"] = true; 195 Features["s-memtime-inst"] = true; 196 break; 197 case GK_GFX1012: 198 case GK_GFX1011: 199 Features["dot1-insts"] = true; 200 Features["dot2-insts"] = true; 201 Features["dot5-insts"] = true; 202 Features["dot6-insts"] = true; 203 LLVM_FALLTHROUGH; 204 case GK_GFX1010: 205 Features["dl-insts"] = true; 206 Features["ci-insts"] = true; 207 Features["flat-address-space"] = true; 208 Features["16-bit-insts"] = true; 209 Features["dpp"] = true; 210 Features["gfx8-insts"] = true; 211 Features["gfx9-insts"] = true; 212 Features["gfx10-insts"] = true; 213 Features["s-memrealtime"] = true; 214 Features["s-memtime-inst"] = true; 215 break; 216 case GK_GFX90A: 217 Features["gfx90a-insts"] = true; 218 LLVM_FALLTHROUGH; 219 case GK_GFX908: 220 Features["dot3-insts"] = true; 221 Features["dot4-insts"] = true; 222 Features["dot5-insts"] = true; 223 Features["dot6-insts"] = true; 224 Features["mai-insts"] = true; 225 LLVM_FALLTHROUGH; 226 case GK_GFX906: 227 Features["dl-insts"] = true; 228 Features["dot1-insts"] = true; 229 Features["dot2-insts"] = true; 230 LLVM_FALLTHROUGH; 231 case GK_GFX90C: 232 case GK_GFX909: 233 case GK_GFX904: 234 case GK_GFX902: 235 case GK_GFX900: 236 Features["gfx9-insts"] = true; 237 LLVM_FALLTHROUGH; 238 case GK_GFX810: 239 case GK_GFX805: 240 case GK_GFX803: 241 case GK_GFX802: 242 case GK_GFX801: 243 Features["gfx8-insts"] = true; 244 Features["16-bit-insts"] = true; 245 Features["dpp"] = true; 246 Features["s-memrealtime"] = true; 247 LLVM_FALLTHROUGH; 248 case GK_GFX705: 249 case GK_GFX704: 250 case GK_GFX703: 251 case GK_GFX702: 252 case GK_GFX701: 253 case GK_GFX700: 254 Features["ci-insts"] = true; 255 Features["flat-address-space"] = true; 256 LLVM_FALLTHROUGH; 257 case GK_GFX602: 258 case GK_GFX601: 259 case GK_GFX600: 260 Features["s-memtime-inst"] = true; 261 break; 262 case GK_NONE: 263 break; 264 default: 265 llvm_unreachable("Unhandled GPU!"); 266 } 267 } else { 268 if (CPU.empty()) 269 CPU = "r600"; 270 271 switch (llvm::AMDGPU::parseArchR600(CPU)) { 272 case GK_CAYMAN: 273 case GK_CYPRESS: 274 case GK_RV770: 275 case GK_RV670: 276 // TODO: Add fp64 when implemented. 277 break; 278 case GK_TURKS: 279 case GK_CAICOS: 280 case GK_BARTS: 281 case GK_SUMO: 282 case GK_REDWOOD: 283 case GK_JUNIPER: 284 case GK_CEDAR: 285 case GK_RV730: 286 case GK_RV710: 287 case GK_RS880: 288 case GK_R630: 289 case GK_R600: 290 break; 291 default: 292 llvm_unreachable("Unhandled GPU!"); 293 } 294 } 295 296 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 297 } 298 299 void AMDGPUTargetInfo::fillValidCPUList( 300 SmallVectorImpl<StringRef> &Values) const { 301 if (isAMDGCN(getTriple())) 302 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 303 else 304 llvm::AMDGPU::fillValidArchListR600(Values); 305 } 306 307 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 308 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 309 } 310 311 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 312 const TargetOptions &Opts) 313 : TargetInfo(Triple), 314 GPUKind(isAMDGCN(Triple) ? 315 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 316 llvm::AMDGPU::parseArchR600(Opts.CPU)), 317 GPUFeatures(isAMDGCN(Triple) ? 318 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 319 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 320 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 321 : DataLayoutStringR600); 322 assert(DataLayout->getAllocaAddrSpace() == Private); 323 GridValues = llvm::omp::AMDGPUGpuGridValues; 324 325 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 326 !isAMDGCN(Triple)); 327 UseAddrSpaceMapMangling = true; 328 329 HasLegalHalfType = true; 330 HasFloat16 = true; 331 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 332 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 333 334 // Set pointer width and alignment for target address space 0. 335 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 336 if (getMaxPointerWidth() == 64) { 337 LongWidth = LongAlign = 64; 338 SizeType = UnsignedLong; 339 PtrDiffType = SignedLong; 340 IntPtrType = SignedLong; 341 } 342 343 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 344 } 345 346 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 347 TargetInfo::adjust(Opts); 348 // ToDo: There are still a few places using default address space as private 349 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 350 // can be removed from the following line. 351 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 352 !isAMDGCN(getTriple())); 353 } 354 355 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 356 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 357 Builtin::FirstTSBuiltin); 358 } 359 360 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 361 MacroBuilder &Builder) const { 362 Builder.defineMacro("__AMD__"); 363 Builder.defineMacro("__AMDGPU__"); 364 365 if (isAMDGCN(getTriple())) 366 Builder.defineMacro("__AMDGCN__"); 367 else 368 Builder.defineMacro("__R600__"); 369 370 if (GPUKind != llvm::AMDGPU::GK_NONE) { 371 StringRef CanonName = isAMDGCN(getTriple()) ? 372 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 373 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 374 if (isAMDGCN(getTriple())) { 375 Builder.defineMacro("__amdgcn_processor__", 376 Twine("\"") + Twine(CanonName) + Twine("\"")); 377 Builder.defineMacro("__amdgcn_target_id__", 378 Twine("\"") + Twine(getTargetID().getValue()) + 379 Twine("\"")); 380 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 381 auto Loc = OffloadArchFeatures.find(F); 382 if (Loc != OffloadArchFeatures.end()) { 383 std::string NewF = F.str(); 384 std::replace(NewF.begin(), NewF.end(), '-', '_'); 385 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 386 Twine("__"), 387 Loc->second ? "1" : "0"); 388 } 389 } 390 } 391 } 392 393 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 394 // removed in the near future. 395 if (hasFMAF()) 396 Builder.defineMacro("__HAS_FMAF__"); 397 if (hasFastFMAF()) 398 Builder.defineMacro("FP_FAST_FMAF"); 399 if (hasLDEXPF()) 400 Builder.defineMacro("__HAS_LDEXPF__"); 401 if (hasFP64()) 402 Builder.defineMacro("__HAS_FP64__"); 403 if (hasFastFMA()) 404 Builder.defineMacro("FP_FAST_FMA"); 405 406 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 407 } 408 409 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 410 assert(HalfFormat == Aux->HalfFormat); 411 assert(FloatFormat == Aux->FloatFormat); 412 assert(DoubleFormat == Aux->DoubleFormat); 413 414 // On x86_64 long double is 80-bit extended precision format, which is 415 // not supported by AMDGPU. 128-bit floating point format is also not 416 // supported by AMDGPU. Therefore keep its own format for these two types. 417 auto SaveLongDoubleFormat = LongDoubleFormat; 418 auto SaveFloat128Format = Float128Format; 419 copyAuxTarget(Aux); 420 LongDoubleFormat = SaveLongDoubleFormat; 421 Float128Format = SaveFloat128Format; 422 // For certain builtin types support on the host target, claim they are 423 // support to pass the compilation of the host code during the device-side 424 // compilation. 425 // FIXME: As the side effect, we also accept `__float128` uses in the device 426 // code. To rejct these builtin types supported in the host target but not in 427 // the device target, one approach would support `device_builtin` attribute 428 // so that we could tell the device builtin types from the host ones. The 429 // also solves the different representations of the same builtin type, such 430 // as `size_t` in the MSVC environment. 431 if (Aux->hasFloat128Type()) { 432 HasFloat128 = true; 433 Float128Format = DoubleFormat; 434 } 435 } 436