1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Generic, // ptr32_sptr 55 Generic, // ptr32_uptr 56 Generic // ptr64 57 }; 58 59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 60 Private, // Default 61 Global, // opencl_global 62 Local, // opencl_local 63 Constant, // opencl_constant 64 Private, // opencl_private 65 Generic, // opencl_generic 66 Global, // opencl_global_device 67 Global, // opencl_global_host 68 Global, // cuda_device 69 Constant, // cuda_constant 70 Local, // cuda_shared 71 Generic, // ptr32_sptr 72 Generic, // ptr32_uptr 73 Generic // ptr64 74 75 }; 76 } // namespace targets 77 } // namespace clang 78 79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 80 #define BUILTIN(ID, TYPE, ATTRS) \ 81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 84 #include "clang/Basic/BuiltinsAMDGPU.def" 85 }; 86 87 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 132 "flat_scratch_lo", "flat_scratch_hi", 133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 161 "a252", "a253", "a254", "a255" 162 }; 163 164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 165 return llvm::makeArrayRef(GCCRegNames); 166 } 167 168 bool AMDGPUTargetInfo::initFeatureMap( 169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 170 const std::vector<std::string> &FeatureVec) const { 171 172 using namespace llvm::AMDGPU; 173 174 // XXX - What does the member GPU mean if device name string passed here? 175 if (isAMDGCN(getTriple())) { 176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 177 case GK_GFX1033: 178 case GK_GFX1032: 179 case GK_GFX1031: 180 case GK_GFX1030: 181 Features["ci-insts"] = true; 182 Features["dot1-insts"] = true; 183 Features["dot2-insts"] = true; 184 Features["dot5-insts"] = true; 185 Features["dot6-insts"] = true; 186 Features["dl-insts"] = true; 187 Features["flat-address-space"] = true; 188 Features["16-bit-insts"] = true; 189 Features["dpp"] = true; 190 Features["gfx8-insts"] = true; 191 Features["gfx9-insts"] = true; 192 Features["gfx10-insts"] = true; 193 Features["gfx10-3-insts"] = true; 194 Features["s-memrealtime"] = true; 195 break; 196 case GK_GFX1012: 197 case GK_GFX1011: 198 Features["dot1-insts"] = true; 199 Features["dot2-insts"] = true; 200 Features["dot5-insts"] = true; 201 Features["dot6-insts"] = true; 202 LLVM_FALLTHROUGH; 203 case GK_GFX1010: 204 Features["dl-insts"] = true; 205 Features["ci-insts"] = true; 206 Features["flat-address-space"] = true; 207 Features["16-bit-insts"] = true; 208 Features["dpp"] = true; 209 Features["gfx8-insts"] = true; 210 Features["gfx9-insts"] = true; 211 Features["gfx10-insts"] = true; 212 Features["s-memrealtime"] = true; 213 Features["s-memtime-inst"] = true; 214 break; 215 case GK_GFX908: 216 Features["dot3-insts"] = true; 217 Features["dot4-insts"] = true; 218 Features["dot5-insts"] = true; 219 Features["dot6-insts"] = true; 220 Features["mai-insts"] = true; 221 LLVM_FALLTHROUGH; 222 case GK_GFX906: 223 Features["dl-insts"] = true; 224 Features["dot1-insts"] = true; 225 Features["dot2-insts"] = true; 226 LLVM_FALLTHROUGH; 227 case GK_GFX90C: 228 case GK_GFX909: 229 case GK_GFX904: 230 case GK_GFX902: 231 case GK_GFX900: 232 Features["gfx9-insts"] = true; 233 LLVM_FALLTHROUGH; 234 case GK_GFX810: 235 case GK_GFX805: 236 case GK_GFX803: 237 case GK_GFX802: 238 case GK_GFX801: 239 Features["gfx8-insts"] = true; 240 Features["16-bit-insts"] = true; 241 Features["dpp"] = true; 242 Features["s-memrealtime"] = true; 243 LLVM_FALLTHROUGH; 244 case GK_GFX705: 245 case GK_GFX704: 246 case GK_GFX703: 247 case GK_GFX702: 248 case GK_GFX701: 249 case GK_GFX700: 250 Features["ci-insts"] = true; 251 Features["flat-address-space"] = true; 252 LLVM_FALLTHROUGH; 253 case GK_GFX602: 254 case GK_GFX601: 255 case GK_GFX600: 256 Features["s-memtime-inst"] = true; 257 break; 258 case GK_NONE: 259 break; 260 default: 261 llvm_unreachable("Unhandled GPU!"); 262 } 263 } else { 264 if (CPU.empty()) 265 CPU = "r600"; 266 267 switch (llvm::AMDGPU::parseArchR600(CPU)) { 268 case GK_CAYMAN: 269 case GK_CYPRESS: 270 case GK_RV770: 271 case GK_RV670: 272 // TODO: Add fp64 when implemented. 273 break; 274 case GK_TURKS: 275 case GK_CAICOS: 276 case GK_BARTS: 277 case GK_SUMO: 278 case GK_REDWOOD: 279 case GK_JUNIPER: 280 case GK_CEDAR: 281 case GK_RV730: 282 case GK_RV710: 283 case GK_RS880: 284 case GK_R630: 285 case GK_R600: 286 break; 287 default: 288 llvm_unreachable("Unhandled GPU!"); 289 } 290 } 291 292 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 293 } 294 295 void AMDGPUTargetInfo::fillValidCPUList( 296 SmallVectorImpl<StringRef> &Values) const { 297 if (isAMDGCN(getTriple())) 298 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 299 else 300 llvm::AMDGPU::fillValidArchListR600(Values); 301 } 302 303 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 304 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 305 } 306 307 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 308 const TargetOptions &Opts) 309 : TargetInfo(Triple), 310 GPUKind(isAMDGCN(Triple) ? 311 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 312 llvm::AMDGPU::parseArchR600(Opts.CPU)), 313 GPUFeatures(isAMDGCN(Triple) ? 314 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 315 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 316 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 317 : DataLayoutStringR600); 318 assert(DataLayout->getAllocaAddrSpace() == Private); 319 GridValues = llvm::omp::AMDGPUGpuGridValues; 320 321 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 322 !isAMDGCN(Triple)); 323 UseAddrSpaceMapMangling = true; 324 325 HasLegalHalfType = true; 326 HasFloat16 = true; 327 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 328 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 329 330 // Set pointer width and alignment for target address space 0. 331 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 332 if (getMaxPointerWidth() == 64) { 333 LongWidth = LongAlign = 64; 334 SizeType = UnsignedLong; 335 PtrDiffType = SignedLong; 336 IntPtrType = SignedLong; 337 } 338 339 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 340 } 341 342 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 343 TargetInfo::adjust(Opts); 344 // ToDo: There are still a few places using default address space as private 345 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 346 // can be removed from the following line. 347 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 348 !isAMDGCN(getTriple())); 349 } 350 351 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 352 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 353 Builtin::FirstTSBuiltin); 354 } 355 356 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 357 MacroBuilder &Builder) const { 358 Builder.defineMacro("__AMD__"); 359 Builder.defineMacro("__AMDGPU__"); 360 361 if (isAMDGCN(getTriple())) 362 Builder.defineMacro("__AMDGCN__"); 363 else 364 Builder.defineMacro("__R600__"); 365 366 if (GPUKind != llvm::AMDGPU::GK_NONE) { 367 StringRef CanonName = isAMDGCN(getTriple()) ? 368 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 369 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 370 if (isAMDGCN(getTriple())) { 371 Builder.defineMacro("__amdgcn_processor__", 372 Twine("\"") + Twine(CanonName) + Twine("\"")); 373 Builder.defineMacro("__amdgcn_target_id__", 374 Twine("\"") + Twine(getTargetID().getValue()) + 375 Twine("\"")); 376 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 377 auto Loc = OffloadArchFeatures.find(F); 378 if (Loc != OffloadArchFeatures.end()) { 379 std::string NewF = F.str(); 380 std::replace(NewF.begin(), NewF.end(), '-', '_'); 381 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 382 Twine("__"), 383 Loc->second ? "1" : "0"); 384 } 385 } 386 } 387 } 388 389 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 390 // removed in the near future. 391 if (hasFMAF()) 392 Builder.defineMacro("__HAS_FMAF__"); 393 if (hasFastFMAF()) 394 Builder.defineMacro("FP_FAST_FMAF"); 395 if (hasLDEXPF()) 396 Builder.defineMacro("__HAS_LDEXPF__"); 397 if (hasFP64()) 398 Builder.defineMacro("__HAS_FP64__"); 399 if (hasFastFMA()) 400 Builder.defineMacro("FP_FAST_FMA"); 401 402 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 403 } 404 405 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 406 assert(HalfFormat == Aux->HalfFormat); 407 assert(FloatFormat == Aux->FloatFormat); 408 assert(DoubleFormat == Aux->DoubleFormat); 409 410 // On x86_64 long double is 80-bit extended precision format, which is 411 // not supported by AMDGPU. 128-bit floating point format is also not 412 // supported by AMDGPU. Therefore keep its own format for these two types. 413 auto SaveLongDoubleFormat = LongDoubleFormat; 414 auto SaveFloat128Format = Float128Format; 415 copyAuxTarget(Aux); 416 LongDoubleFormat = SaveLongDoubleFormat; 417 Float128Format = SaveFloat128Format; 418 // For certain builtin types support on the host target, claim they are 419 // support to pass the compilation of the host code during the device-side 420 // compilation. 421 // FIXME: As the side effect, we also accept `__float128` uses in the device 422 // code. To rejct these builtin types supported in the host target but not in 423 // the device target, one approach would support `device_builtin` attribute 424 // so that we could tell the device builtin types from the host ones. The 425 // also solves the different representations of the same builtin type, such 426 // as `size_t` in the MSVC environment. 427 if (Aux->hasFloat128Type()) { 428 HasFloat128 = true; 429 Float128Format = DoubleFormat; 430 } 431 } 432