1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Generic, // ptr32_sptr 55 Generic, // ptr32_uptr 56 Generic // ptr64 57 }; 58 59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 60 Private, // Default 61 Global, // opencl_global 62 Local, // opencl_local 63 Constant, // opencl_constant 64 Private, // opencl_private 65 Generic, // opencl_generic 66 Global, // opencl_global_device 67 Global, // opencl_global_host 68 Global, // cuda_device 69 Constant, // cuda_constant 70 Local, // cuda_shared 71 Generic, // ptr32_sptr 72 Generic, // ptr32_uptr 73 Generic // ptr64 74 75 }; 76 } // namespace targets 77 } // namespace clang 78 79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 80 #define BUILTIN(ID, TYPE, ATTRS) \ 81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 84 #include "clang/Basic/BuiltinsAMDGPU.def" 85 }; 86 87 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 132 "flat_scratch_lo", "flat_scratch_hi", 133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 161 "a252", "a253", "a254", "a255" 162 }; 163 164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 165 return llvm::makeArrayRef(GCCRegNames); 166 } 167 168 bool AMDGPUTargetInfo::initFeatureMap( 169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 170 const std::vector<std::string> &FeatureVec) const { 171 172 using namespace llvm::AMDGPU; 173 174 // XXX - What does the member GPU mean if device name string passed here? 175 if (isAMDGCN(getTriple())) { 176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 177 case GK_GFX1031: 178 case GK_GFX1030: 179 Features["ci-insts"] = true; 180 Features["dot1-insts"] = true; 181 Features["dot2-insts"] = true; 182 Features["dot5-insts"] = true; 183 Features["dot6-insts"] = true; 184 Features["dl-insts"] = true; 185 Features["flat-address-space"] = true; 186 Features["16-bit-insts"] = true; 187 Features["dpp"] = true; 188 Features["gfx8-insts"] = true; 189 Features["gfx9-insts"] = true; 190 Features["gfx10-insts"] = true; 191 Features["gfx10-3-insts"] = true; 192 Features["s-memrealtime"] = true; 193 break; 194 case GK_GFX1012: 195 case GK_GFX1011: 196 Features["dot1-insts"] = true; 197 Features["dot2-insts"] = true; 198 Features["dot5-insts"] = true; 199 Features["dot6-insts"] = true; 200 LLVM_FALLTHROUGH; 201 case GK_GFX1010: 202 Features["dl-insts"] = true; 203 Features["ci-insts"] = true; 204 Features["flat-address-space"] = true; 205 Features["16-bit-insts"] = true; 206 Features["dpp"] = true; 207 Features["gfx8-insts"] = true; 208 Features["gfx9-insts"] = true; 209 Features["gfx10-insts"] = true; 210 Features["s-memrealtime"] = true; 211 break; 212 case GK_GFX908: 213 Features["dot3-insts"] = true; 214 Features["dot4-insts"] = true; 215 Features["dot5-insts"] = true; 216 Features["dot6-insts"] = true; 217 Features["mai-insts"] = true; 218 LLVM_FALLTHROUGH; 219 case GK_GFX906: 220 Features["dl-insts"] = true; 221 Features["dot1-insts"] = true; 222 Features["dot2-insts"] = true; 223 LLVM_FALLTHROUGH; 224 case GK_GFX909: 225 case GK_GFX904: 226 case GK_GFX902: 227 case GK_GFX900: 228 Features["gfx9-insts"] = true; 229 LLVM_FALLTHROUGH; 230 case GK_GFX810: 231 case GK_GFX805: 232 case GK_GFX803: 233 case GK_GFX802: 234 case GK_GFX801: 235 Features["gfx8-insts"] = true; 236 Features["16-bit-insts"] = true; 237 Features["dpp"] = true; 238 Features["s-memrealtime"] = true; 239 LLVM_FALLTHROUGH; 240 case GK_GFX705: 241 case GK_GFX704: 242 case GK_GFX703: 243 case GK_GFX702: 244 case GK_GFX701: 245 case GK_GFX700: 246 Features["ci-insts"] = true; 247 Features["flat-address-space"] = true; 248 LLVM_FALLTHROUGH; 249 case GK_GFX602: 250 case GK_GFX601: 251 case GK_GFX600: 252 break; 253 case GK_NONE: 254 break; 255 default: 256 llvm_unreachable("Unhandled GPU!"); 257 } 258 } else { 259 if (CPU.empty()) 260 CPU = "r600"; 261 262 switch (llvm::AMDGPU::parseArchR600(CPU)) { 263 case GK_CAYMAN: 264 case GK_CYPRESS: 265 case GK_RV770: 266 case GK_RV670: 267 // TODO: Add fp64 when implemented. 268 break; 269 case GK_TURKS: 270 case GK_CAICOS: 271 case GK_BARTS: 272 case GK_SUMO: 273 case GK_REDWOOD: 274 case GK_JUNIPER: 275 case GK_CEDAR: 276 case GK_RV730: 277 case GK_RV710: 278 case GK_RS880: 279 case GK_R630: 280 case GK_R600: 281 break; 282 default: 283 llvm_unreachable("Unhandled GPU!"); 284 } 285 } 286 287 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 288 } 289 290 void AMDGPUTargetInfo::fillValidCPUList( 291 SmallVectorImpl<StringRef> &Values) const { 292 if (isAMDGCN(getTriple())) 293 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 294 else 295 llvm::AMDGPU::fillValidArchListR600(Values); 296 } 297 298 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 299 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 300 } 301 302 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 303 const TargetOptions &Opts) 304 : TargetInfo(Triple), 305 GPUKind(isAMDGCN(Triple) ? 306 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 307 llvm::AMDGPU::parseArchR600(Opts.CPU)), 308 GPUFeatures(isAMDGCN(Triple) ? 309 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 310 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 311 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 312 : DataLayoutStringR600); 313 assert(DataLayout->getAllocaAddrSpace() == Private); 314 GridValues = llvm::omp::AMDGPUGpuGridValues; 315 316 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 317 !isAMDGCN(Triple)); 318 UseAddrSpaceMapMangling = true; 319 320 HasLegalHalfType = true; 321 HasFloat16 = true; 322 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 323 324 // Set pointer width and alignment for target address space 0. 325 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 326 if (getMaxPointerWidth() == 64) { 327 LongWidth = LongAlign = 64; 328 SizeType = UnsignedLong; 329 PtrDiffType = SignedLong; 330 IntPtrType = SignedLong; 331 } 332 333 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 334 } 335 336 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 337 TargetInfo::adjust(Opts); 338 // ToDo: There are still a few places using default address space as private 339 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 340 // can be removed from the following line. 341 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 342 !isAMDGCN(getTriple())); 343 } 344 345 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 346 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 347 Builtin::FirstTSBuiltin); 348 } 349 350 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 351 MacroBuilder &Builder) const { 352 Builder.defineMacro("__AMD__"); 353 Builder.defineMacro("__AMDGPU__"); 354 355 if (isAMDGCN(getTriple())) 356 Builder.defineMacro("__AMDGCN__"); 357 else 358 Builder.defineMacro("__R600__"); 359 360 if (GPUKind != llvm::AMDGPU::GK_NONE) { 361 StringRef CanonName = isAMDGCN(getTriple()) ? 362 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 363 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 364 if (isAMDGCN(getTriple())) { 365 Builder.defineMacro("__amdgcn_processor__", 366 Twine("\"") + Twine(CanonName) + Twine("\"")); 367 Builder.defineMacro("__amdgcn_target_id__", 368 Twine("\"") + Twine(getTargetID().getValue()) + 369 Twine("\"")); 370 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 371 auto Loc = OffloadArchFeatures.find(F); 372 if (Loc != OffloadArchFeatures.end()) { 373 std::string NewF = F.str(); 374 std::replace(NewF.begin(), NewF.end(), '-', '_'); 375 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 376 Twine("__"), 377 Loc->second ? "1" : "0"); 378 } 379 } 380 } 381 } 382 383 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 384 // removed in the near future. 385 if (hasFMAF()) 386 Builder.defineMacro("__HAS_FMAF__"); 387 if (hasFastFMAF()) 388 Builder.defineMacro("FP_FAST_FMAF"); 389 if (hasLDEXPF()) 390 Builder.defineMacro("__HAS_LDEXPF__"); 391 if (hasFP64()) 392 Builder.defineMacro("__HAS_FP64__"); 393 if (hasFastFMA()) 394 Builder.defineMacro("FP_FAST_FMA"); 395 396 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 397 } 398 399 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 400 assert(HalfFormat == Aux->HalfFormat); 401 assert(FloatFormat == Aux->FloatFormat); 402 assert(DoubleFormat == Aux->DoubleFormat); 403 404 // On x86_64 long double is 80-bit extended precision format, which is 405 // not supported by AMDGPU. 128-bit floating point format is also not 406 // supported by AMDGPU. Therefore keep its own format for these two types. 407 auto SaveLongDoubleFormat = LongDoubleFormat; 408 auto SaveFloat128Format = Float128Format; 409 copyAuxTarget(Aux); 410 LongDoubleFormat = SaveLongDoubleFormat; 411 Float128Format = SaveFloat128Format; 412 // For certain builtin types support on the host target, claim they are 413 // support to pass the compilation of the host code during the device-side 414 // compilation. 415 // FIXME: As the side effect, we also accept `__float128` uses in the device 416 // code. To rejct these builtin types supported in the host target but not in 417 // the device target, one approach would support `device_builtin` attribute 418 // so that we could tell the device builtin types from the host ones. The 419 // also solves the different representations of the same builtin type, such 420 // as `size_t` in the MSVC environment. 421 if (Aux->hasFloat128Type()) { 422 HasFloat128 = true; 423 Float128Format = DoubleFormat; 424 } 425 } 426