1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Generic, // ptr32_sptr 55 Generic, // ptr32_uptr 56 Generic // ptr64 57 }; 58 59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 60 Private, // Default 61 Global, // opencl_global 62 Local, // opencl_local 63 Constant, // opencl_constant 64 Private, // opencl_private 65 Generic, // opencl_generic 66 Global, // opencl_global_device 67 Global, // opencl_global_host 68 Global, // cuda_device 69 Constant, // cuda_constant 70 Local, // cuda_shared 71 Generic, // ptr32_sptr 72 Generic, // ptr32_uptr 73 Generic // ptr64 74 75 }; 76 } // namespace targets 77 } // namespace clang 78 79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 80 #define BUILTIN(ID, TYPE, ATTRS) \ 81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 84 #include "clang/Basic/BuiltinsAMDGPU.def" 85 }; 86 87 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 132 "flat_scratch_lo", "flat_scratch_hi", 133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 161 "a252", "a253", "a254", "a255" 162 }; 163 164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 165 return llvm::makeArrayRef(GCCRegNames); 166 } 167 168 bool AMDGPUTargetInfo::initFeatureMap( 169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 170 const std::vector<std::string> &FeatureVec) const { 171 172 using namespace llvm::AMDGPU; 173 174 // XXX - What does the member GPU mean if device name string passed here? 175 if (isAMDGCN(getTriple())) { 176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 177 case GK_GFX1032: 178 case GK_GFX1031: 179 case GK_GFX1030: 180 Features["ci-insts"] = true; 181 Features["dot1-insts"] = true; 182 Features["dot2-insts"] = true; 183 Features["dot5-insts"] = true; 184 Features["dot6-insts"] = true; 185 Features["dl-insts"] = true; 186 Features["flat-address-space"] = true; 187 Features["16-bit-insts"] = true; 188 Features["dpp"] = true; 189 Features["gfx8-insts"] = true; 190 Features["gfx9-insts"] = true; 191 Features["gfx10-insts"] = true; 192 Features["gfx10-3-insts"] = true; 193 Features["s-memrealtime"] = true; 194 break; 195 case GK_GFX1012: 196 case GK_GFX1011: 197 Features["dot1-insts"] = true; 198 Features["dot2-insts"] = true; 199 Features["dot5-insts"] = true; 200 Features["dot6-insts"] = true; 201 LLVM_FALLTHROUGH; 202 case GK_GFX1010: 203 Features["dl-insts"] = true; 204 Features["ci-insts"] = true; 205 Features["flat-address-space"] = true; 206 Features["16-bit-insts"] = true; 207 Features["dpp"] = true; 208 Features["gfx8-insts"] = true; 209 Features["gfx9-insts"] = true; 210 Features["gfx10-insts"] = true; 211 Features["s-memrealtime"] = true; 212 break; 213 case GK_GFX908: 214 Features["dot3-insts"] = true; 215 Features["dot4-insts"] = true; 216 Features["dot5-insts"] = true; 217 Features["dot6-insts"] = true; 218 Features["mai-insts"] = true; 219 LLVM_FALLTHROUGH; 220 case GK_GFX906: 221 Features["dl-insts"] = true; 222 Features["dot1-insts"] = true; 223 Features["dot2-insts"] = true; 224 LLVM_FALLTHROUGH; 225 case GK_GFX909: 226 case GK_GFX904: 227 case GK_GFX902: 228 case GK_GFX900: 229 Features["gfx9-insts"] = true; 230 LLVM_FALLTHROUGH; 231 case GK_GFX810: 232 case GK_GFX805: 233 case GK_GFX803: 234 case GK_GFX802: 235 case GK_GFX801: 236 Features["gfx8-insts"] = true; 237 Features["16-bit-insts"] = true; 238 Features["dpp"] = true; 239 Features["s-memrealtime"] = true; 240 LLVM_FALLTHROUGH; 241 case GK_GFX705: 242 case GK_GFX704: 243 case GK_GFX703: 244 case GK_GFX702: 245 case GK_GFX701: 246 case GK_GFX700: 247 Features["ci-insts"] = true; 248 Features["flat-address-space"] = true; 249 LLVM_FALLTHROUGH; 250 case GK_GFX602: 251 case GK_GFX601: 252 case GK_GFX600: 253 break; 254 case GK_NONE: 255 break; 256 default: 257 llvm_unreachable("Unhandled GPU!"); 258 } 259 } else { 260 if (CPU.empty()) 261 CPU = "r600"; 262 263 switch (llvm::AMDGPU::parseArchR600(CPU)) { 264 case GK_CAYMAN: 265 case GK_CYPRESS: 266 case GK_RV770: 267 case GK_RV670: 268 // TODO: Add fp64 when implemented. 269 break; 270 case GK_TURKS: 271 case GK_CAICOS: 272 case GK_BARTS: 273 case GK_SUMO: 274 case GK_REDWOOD: 275 case GK_JUNIPER: 276 case GK_CEDAR: 277 case GK_RV730: 278 case GK_RV710: 279 case GK_RS880: 280 case GK_R630: 281 case GK_R600: 282 break; 283 default: 284 llvm_unreachable("Unhandled GPU!"); 285 } 286 } 287 288 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 289 } 290 291 void AMDGPUTargetInfo::fillValidCPUList( 292 SmallVectorImpl<StringRef> &Values) const { 293 if (isAMDGCN(getTriple())) 294 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 295 else 296 llvm::AMDGPU::fillValidArchListR600(Values); 297 } 298 299 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 300 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 301 } 302 303 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 304 const TargetOptions &Opts) 305 : TargetInfo(Triple), 306 GPUKind(isAMDGCN(Triple) ? 307 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 308 llvm::AMDGPU::parseArchR600(Opts.CPU)), 309 GPUFeatures(isAMDGCN(Triple) ? 310 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 311 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 312 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 313 : DataLayoutStringR600); 314 assert(DataLayout->getAllocaAddrSpace() == Private); 315 GridValues = llvm::omp::AMDGPUGpuGridValues; 316 317 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 318 !isAMDGCN(Triple)); 319 UseAddrSpaceMapMangling = true; 320 321 HasLegalHalfType = true; 322 HasFloat16 = true; 323 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; 324 325 // Set pointer width and alignment for target address space 0. 326 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 327 if (getMaxPointerWidth() == 64) { 328 LongWidth = LongAlign = 64; 329 SizeType = UnsignedLong; 330 PtrDiffType = SignedLong; 331 IntPtrType = SignedLong; 332 } 333 334 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 335 } 336 337 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 338 TargetInfo::adjust(Opts); 339 // ToDo: There are still a few places using default address space as private 340 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 341 // can be removed from the following line. 342 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 343 !isAMDGCN(getTriple())); 344 } 345 346 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 347 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 348 Builtin::FirstTSBuiltin); 349 } 350 351 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 352 MacroBuilder &Builder) const { 353 Builder.defineMacro("__AMD__"); 354 Builder.defineMacro("__AMDGPU__"); 355 356 if (isAMDGCN(getTriple())) 357 Builder.defineMacro("__AMDGCN__"); 358 else 359 Builder.defineMacro("__R600__"); 360 361 if (GPUKind != llvm::AMDGPU::GK_NONE) { 362 StringRef CanonName = isAMDGCN(getTriple()) ? 363 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 364 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 365 if (isAMDGCN(getTriple())) { 366 Builder.defineMacro("__amdgcn_processor__", 367 Twine("\"") + Twine(CanonName) + Twine("\"")); 368 Builder.defineMacro("__amdgcn_target_id__", 369 Twine("\"") + Twine(getTargetID().getValue()) + 370 Twine("\"")); 371 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 372 auto Loc = OffloadArchFeatures.find(F); 373 if (Loc != OffloadArchFeatures.end()) { 374 std::string NewF = F.str(); 375 std::replace(NewF.begin(), NewF.end(), '-', '_'); 376 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 377 Twine("__"), 378 Loc->second ? "1" : "0"); 379 } 380 } 381 } 382 } 383 384 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 385 // removed in the near future. 386 if (hasFMAF()) 387 Builder.defineMacro("__HAS_FMAF__"); 388 if (hasFastFMAF()) 389 Builder.defineMacro("FP_FAST_FMAF"); 390 if (hasLDEXPF()) 391 Builder.defineMacro("__HAS_LDEXPF__"); 392 if (hasFP64()) 393 Builder.defineMacro("__HAS_FP64__"); 394 if (hasFastFMA()) 395 Builder.defineMacro("FP_FAST_FMA"); 396 397 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); 398 } 399 400 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 401 assert(HalfFormat == Aux->HalfFormat); 402 assert(FloatFormat == Aux->FloatFormat); 403 assert(DoubleFormat == Aux->DoubleFormat); 404 405 // On x86_64 long double is 80-bit extended precision format, which is 406 // not supported by AMDGPU. 128-bit floating point format is also not 407 // supported by AMDGPU. Therefore keep its own format for these two types. 408 auto SaveLongDoubleFormat = LongDoubleFormat; 409 auto SaveFloat128Format = Float128Format; 410 copyAuxTarget(Aux); 411 LongDoubleFormat = SaveLongDoubleFormat; 412 Float128Format = SaveFloat128Format; 413 // For certain builtin types support on the host target, claim they are 414 // support to pass the compilation of the host code during the device-side 415 // compilation. 416 // FIXME: As the side effect, we also accept `__float128` uses in the device 417 // code. To rejct these builtin types supported in the host target but not in 418 // the device target, one approach would support `device_builtin` attribute 419 // so that we could tell the device builtin types from the host ones. The 420 // also solves the different representations of the same builtin type, such 421 // as `size_t` in the MSVC environment. 422 if (Aux->hasFloat128Type()) { 423 HasFloat128 = true; 424 Float128Format = DoubleFormat; 425 } 426 } 427