1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Generic, // ptr32_sptr 55 Generic, // ptr32_uptr 56 Generic // ptr64 57 }; 58 59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 60 Private, // Default 61 Global, // opencl_global 62 Local, // opencl_local 63 Constant, // opencl_constant 64 Private, // opencl_private 65 Generic, // opencl_generic 66 Global, // opencl_global_device 67 Global, // opencl_global_host 68 Global, // cuda_device 69 Constant, // cuda_constant 70 Local, // cuda_shared 71 Generic, // ptr32_sptr 72 Generic, // ptr32_uptr 73 Generic // ptr64 74 75 }; 76 } // namespace targets 77 } // namespace clang 78 79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 80 #define BUILTIN(ID, TYPE, ATTRS) \ 81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 84 #include "clang/Basic/BuiltinsAMDGPU.def" 85 }; 86 87 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 132 "flat_scratch_lo", "flat_scratch_hi", 133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 161 "a252", "a253", "a254", "a255" 162 }; 163 164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 165 return llvm::makeArrayRef(GCCRegNames); 166 } 167 168 bool AMDGPUTargetInfo::initFeatureMap( 169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 170 const std::vector<std::string> &FeatureVec) const { 171 172 using namespace llvm::AMDGPU; 173 174 // XXX - What does the member GPU mean if device name string passed here? 175 if (isAMDGCN(getTriple())) { 176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 177 case GK_GFX1033: 178 case GK_GFX1032: 179 case GK_GFX1031: 180 case GK_GFX1030: 181 Features["ci-insts"] = true; 182 Features["dot1-insts"] = true; 183 Features["dot2-insts"] = true; 184 Features["dot5-insts"] = true; 185 Features["dot6-insts"] = true; 186 Features["dl-insts"] = true; 187 Features["flat-address-space"] = true; 188 Features["16-bit-insts"] = true; 189 Features["dpp"] = true; 190 Features["gfx8-insts"] = true; 191 Features["gfx9-insts"] = true; 192 Features["gfx10-insts"] = true; 193 Features["gfx10-3-insts"] = true; 194 Features["s-memrealtime"] = true; 195 break; 196 case GK_GFX1012: 197 case GK_GFX1011: 198 Features["dot1-insts"] = true; 199 Features["dot2-insts"] = true; 200 Features["dot5-insts"] = true; 201 Features["dot6-insts"] = true; 202 LLVM_FALLTHROUGH; 203 case GK_GFX1010: 204 Features["dl-insts"] = true; 205 Features["ci-insts"] = true; 206 Features["flat-address-space"] = true; 207 Features["16-bit-insts"] = true; 208 Features["dpp"] = true; 209 Features["gfx8-insts"] = true; 210 Features["gfx9-insts"] = true; 211 Features["gfx10-insts"] = true; 212 Features["s-memrealtime"] = true; 213 Features["s-memtime-inst"] = true; 214 break; 215 case GK_GFX90A: 216 Features["gfx90a-insts"] = true; 217 LLVM_FALLTHROUGH; 218 case GK_GFX908: 219 Features["dot3-insts"] = true; 220 Features["dot4-insts"] = true; 221 Features["dot5-insts"] = true; 222 Features["dot6-insts"] = true; 223 Features["mai-insts"] = true; 224 LLVM_FALLTHROUGH; 225 case GK_GFX906: 226 Features["dl-insts"] = true; 227 Features["dot1-insts"] = true; 228 Features["dot2-insts"] = true; 229 LLVM_FALLTHROUGH; 230 case GK_GFX90C: 231 case GK_GFX909: 232 case GK_GFX904: 233 case GK_GFX902: 234 case GK_GFX900: 235 Features["gfx9-insts"] = true; 236 LLVM_FALLTHROUGH; 237 case GK_GFX810: 238 case GK_GFX805: 239 case GK_GFX803: 240 case GK_GFX802: 241 case GK_GFX801: 242 Features["gfx8-insts"] = true; 243 Features["16-bit-insts"] = true; 244 Features["dpp"] = true; 245 Features["s-memrealtime"] = true; 246 LLVM_FALLTHROUGH; 247 case GK_GFX705: 248 case GK_GFX704: 249 case GK_GFX703: 250 case GK_GFX702: 251 case GK_GFX701: 252 case GK_GFX700: 253 Features["ci-insts"] = true; 254 Features["flat-address-space"] = true; 255 LLVM_FALLTHROUGH; 256 case GK_GFX602: 257 case GK_GFX601: 258 case GK_GFX600: 259 Features["s-memtime-inst"] = true; 260 break; 261 case GK_NONE: 262 break; 263 default: 264 llvm_unreachable("Unhandled GPU!"); 265 } 266 } else { 267 if (CPU.empty()) 268 CPU = "r600"; 269 270 switch (llvm::AMDGPU::parseArchR600(CPU)) { 271 case GK_CAYMAN: 272 case GK_CYPRESS: 273 case GK_RV770: 274 case GK_RV670: 275 // TODO: Add fp64 when implemented. 276 break; 277 case GK_TURKS: 278 case GK_CAICOS: 279 case GK_BARTS: 280 case GK_SUMO: 281 case GK_REDWOOD: 282 case GK_JUNIPER: 283 case GK_CEDAR: 284 case GK_RV730: 285 case GK_RV710: 286 case GK_RS880: 287 case GK_R630: 288 case GK_R600: 289 break; 290 default: 291 llvm_unreachable("Unhandled GPU!"); 292 } 293 } 294 295 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 296 } 297 298 void AMDGPUTargetInfo::fillValidCPUList( 299 SmallVectorImpl<StringRef> &Values) const { 300 if (isAMDGCN(getTriple())) 301 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 302 else 303 llvm::AMDGPU::fillValidArchListR600(Values); 304 } 305 306 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 307 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 308 } 309 310 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 311 const TargetOptions &Opts) 312 : TargetInfo(Triple), 313 GPUKind(isAMDGCN(Triple) ? 314 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 315 llvm::AMDGPU::parseArchR600(Opts.CPU)), 316 GPUFeatures(isAMDGCN(Triple) ? 317 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 318 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 319 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 320 : DataLayoutStringR600); 321 assert(DataLayout->getAllocaAddrSpace() == Private); 322 GridValues = llvm::omp::AMDGPUGpuGridValues; 323 324 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 325 !isAMDGCN(Triple)); 326 UseAddrSpaceMapMangling = true; 327 328 HasLegalHalfType = true; 329 HasFloat16 = true; 330 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 331 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 332 333 // Set pointer width and alignment for target address space 0. 334 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 335 if (getMaxPointerWidth() == 64) { 336 LongWidth = LongAlign = 64; 337 SizeType = UnsignedLong; 338 PtrDiffType = SignedLong; 339 IntPtrType = SignedLong; 340 } 341 342 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 343 } 344 345 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 346 TargetInfo::adjust(Opts); 347 // ToDo: There are still a few places using default address space as private 348 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 349 // can be removed from the following line. 350 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 351 !isAMDGCN(getTriple())); 352 } 353 354 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 355 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 356 Builtin::FirstTSBuiltin); 357 } 358 359 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 360 MacroBuilder &Builder) const { 361 Builder.defineMacro("__AMD__"); 362 Builder.defineMacro("__AMDGPU__"); 363 364 if (isAMDGCN(getTriple())) 365 Builder.defineMacro("__AMDGCN__"); 366 else 367 Builder.defineMacro("__R600__"); 368 369 if (GPUKind != llvm::AMDGPU::GK_NONE) { 370 StringRef CanonName = isAMDGCN(getTriple()) ? 371 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 372 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 373 if (isAMDGCN(getTriple())) { 374 Builder.defineMacro("__amdgcn_processor__", 375 Twine("\"") + Twine(CanonName) + Twine("\"")); 376 Builder.defineMacro("__amdgcn_target_id__", 377 Twine("\"") + Twine(getTargetID().getValue()) + 378 Twine("\"")); 379 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 380 auto Loc = OffloadArchFeatures.find(F); 381 if (Loc != OffloadArchFeatures.end()) { 382 std::string NewF = F.str(); 383 std::replace(NewF.begin(), NewF.end(), '-', '_'); 384 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 385 Twine("__"), 386 Loc->second ? "1" : "0"); 387 } 388 } 389 } 390 } 391 392 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 393 // removed in the near future. 394 if (hasFMAF()) 395 Builder.defineMacro("__HAS_FMAF__"); 396 if (hasFastFMAF()) 397 Builder.defineMacro("FP_FAST_FMAF"); 398 if (hasLDEXPF()) 399 Builder.defineMacro("__HAS_LDEXPF__"); 400 if (hasFP64()) 401 Builder.defineMacro("__HAS_FP64__"); 402 if (hasFastFMA()) 403 Builder.defineMacro("FP_FAST_FMA"); 404 405 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 406 } 407 408 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 409 assert(HalfFormat == Aux->HalfFormat); 410 assert(FloatFormat == Aux->FloatFormat); 411 assert(DoubleFormat == Aux->DoubleFormat); 412 413 // On x86_64 long double is 80-bit extended precision format, which is 414 // not supported by AMDGPU. 128-bit floating point format is also not 415 // supported by AMDGPU. Therefore keep its own format for these two types. 416 auto SaveLongDoubleFormat = LongDoubleFormat; 417 auto SaveFloat128Format = Float128Format; 418 copyAuxTarget(Aux); 419 LongDoubleFormat = SaveLongDoubleFormat; 420 Float128Format = SaveFloat128Format; 421 // For certain builtin types support on the host target, claim they are 422 // support to pass the compilation of the host code during the device-side 423 // compilation. 424 // FIXME: As the side effect, we also accept `__float128` uses in the device 425 // code. To rejct these builtin types supported in the host target but not in 426 // the device target, one approach would support `device_builtin` attribute 427 // so that we could tell the device builtin types from the host ones. The 428 // also solves the different representations of the same builtin type, such 429 // as `size_t` in the MSVC environment. 430 if (Aux->hasFloat128Type()) { 431 HasFloat128 = true; 432 Float128Format = DoubleFormat; 433 } 434 } 435