1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // opencl_global_device 50 Global, // opencl_global_host 51 Global, // cuda_device 52 Constant, // cuda_constant 53 Local, // cuda_shared 54 Generic, // ptr32_sptr 55 Generic, // ptr32_uptr 56 Generic // ptr64 57 }; 58 59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 60 Private, // Default 61 Global, // opencl_global 62 Local, // opencl_local 63 Constant, // opencl_constant 64 Private, // opencl_private 65 Generic, // opencl_generic 66 Global, // opencl_global_device 67 Global, // opencl_global_host 68 Global, // cuda_device 69 Constant, // cuda_constant 70 Local, // cuda_shared 71 Generic, // ptr32_sptr 72 Generic, // ptr32_uptr 73 Generic // ptr64 74 75 }; 76 } // namespace targets 77 } // namespace clang 78 79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 80 #define BUILTIN(ID, TYPE, ATTRS) \ 81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 84 #include "clang/Basic/BuiltinsAMDGPU.def" 85 }; 86 87 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 132 "flat_scratch_lo", "flat_scratch_hi", 133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 161 "a252", "a253", "a254", "a255" 162 }; 163 164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 165 return llvm::makeArrayRef(GCCRegNames); 166 } 167 168 bool AMDGPUTargetInfo::initFeatureMap( 169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 170 const std::vector<std::string> &FeatureVec) const { 171 172 using namespace llvm::AMDGPU; 173 174 // XXX - What does the member GPU mean if device name string passed here? 175 if (isAMDGCN(getTriple())) { 176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 177 case GK_GFX1031: 178 case GK_GFX1030: 179 Features["ci-insts"] = true; 180 Features["dot1-insts"] = true; 181 Features["dot2-insts"] = true; 182 Features["dot5-insts"] = true; 183 Features["dot6-insts"] = true; 184 Features["dl-insts"] = true; 185 Features["flat-address-space"] = true; 186 Features["16-bit-insts"] = true; 187 Features["dpp"] = true; 188 Features["gfx8-insts"] = true; 189 Features["gfx9-insts"] = true; 190 Features["gfx10-insts"] = true; 191 Features["gfx10-3-insts"] = true; 192 Features["s-memrealtime"] = true; 193 break; 194 case GK_GFX1012: 195 case GK_GFX1011: 196 Features["dot1-insts"] = true; 197 Features["dot2-insts"] = true; 198 Features["dot5-insts"] = true; 199 Features["dot6-insts"] = true; 200 LLVM_FALLTHROUGH; 201 case GK_GFX1010: 202 Features["dl-insts"] = true; 203 Features["ci-insts"] = true; 204 Features["flat-address-space"] = true; 205 Features["16-bit-insts"] = true; 206 Features["dpp"] = true; 207 Features["gfx8-insts"] = true; 208 Features["gfx9-insts"] = true; 209 Features["gfx10-insts"] = true; 210 Features["s-memrealtime"] = true; 211 break; 212 case GK_GFX908: 213 Features["dot3-insts"] = true; 214 Features["dot4-insts"] = true; 215 Features["dot5-insts"] = true; 216 Features["dot6-insts"] = true; 217 Features["mai-insts"] = true; 218 LLVM_FALLTHROUGH; 219 case GK_GFX906: 220 Features["dl-insts"] = true; 221 Features["dot1-insts"] = true; 222 Features["dot2-insts"] = true; 223 LLVM_FALLTHROUGH; 224 case GK_GFX909: 225 case GK_GFX904: 226 case GK_GFX902: 227 case GK_GFX900: 228 Features["gfx9-insts"] = true; 229 LLVM_FALLTHROUGH; 230 case GK_GFX810: 231 case GK_GFX803: 232 case GK_GFX802: 233 case GK_GFX801: 234 Features["gfx8-insts"] = true; 235 Features["16-bit-insts"] = true; 236 Features["dpp"] = true; 237 Features["s-memrealtime"] = true; 238 LLVM_FALLTHROUGH; 239 case GK_GFX704: 240 case GK_GFX703: 241 case GK_GFX702: 242 case GK_GFX701: 243 case GK_GFX700: 244 Features["ci-insts"] = true; 245 Features["flat-address-space"] = true; 246 LLVM_FALLTHROUGH; 247 case GK_GFX601: 248 case GK_GFX600: 249 break; 250 case GK_NONE: 251 break; 252 default: 253 llvm_unreachable("Unhandled GPU!"); 254 } 255 } else { 256 if (CPU.empty()) 257 CPU = "r600"; 258 259 switch (llvm::AMDGPU::parseArchR600(CPU)) { 260 case GK_CAYMAN: 261 case GK_CYPRESS: 262 case GK_RV770: 263 case GK_RV670: 264 // TODO: Add fp64 when implemented. 265 break; 266 case GK_TURKS: 267 case GK_CAICOS: 268 case GK_BARTS: 269 case GK_SUMO: 270 case GK_REDWOOD: 271 case GK_JUNIPER: 272 case GK_CEDAR: 273 case GK_RV730: 274 case GK_RV710: 275 case GK_RS880: 276 case GK_R630: 277 case GK_R600: 278 break; 279 default: 280 llvm_unreachable("Unhandled GPU!"); 281 } 282 } 283 284 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 285 } 286 287 void AMDGPUTargetInfo::fillValidCPUList( 288 SmallVectorImpl<StringRef> &Values) const { 289 if (isAMDGCN(getTriple())) 290 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 291 else 292 llvm::AMDGPU::fillValidArchListR600(Values); 293 } 294 295 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 296 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 297 } 298 299 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 300 const TargetOptions &Opts) 301 : TargetInfo(Triple), 302 GPUKind(isAMDGCN(Triple) ? 303 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 304 llvm::AMDGPU::parseArchR600(Opts.CPU)), 305 GPUFeatures(isAMDGCN(Triple) ? 306 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 307 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 308 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 309 : DataLayoutStringR600); 310 assert(DataLayout->getAllocaAddrSpace() == Private); 311 GridValues = llvm::omp::AMDGPUGpuGridValues; 312 313 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 314 !isAMDGCN(Triple)); 315 UseAddrSpaceMapMangling = true; 316 317 HasLegalHalfType = true; 318 HasFloat16 = true; 319 320 // Set pointer width and alignment for target address space 0. 321 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 322 if (getMaxPointerWidth() == 64) { 323 LongWidth = LongAlign = 64; 324 SizeType = UnsignedLong; 325 PtrDiffType = SignedLong; 326 IntPtrType = SignedLong; 327 } 328 329 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 330 } 331 332 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 333 TargetInfo::adjust(Opts); 334 // ToDo: There are still a few places using default address space as private 335 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 336 // can be removed from the following line. 337 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 338 !isAMDGCN(getTriple())); 339 } 340 341 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 342 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 343 Builtin::FirstTSBuiltin); 344 } 345 346 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 347 MacroBuilder &Builder) const { 348 Builder.defineMacro("__AMD__"); 349 Builder.defineMacro("__AMDGPU__"); 350 351 if (isAMDGCN(getTriple())) 352 Builder.defineMacro("__AMDGCN__"); 353 else 354 Builder.defineMacro("__R600__"); 355 356 if (GPUKind != llvm::AMDGPU::GK_NONE) { 357 StringRef CanonName = isAMDGCN(getTriple()) ? 358 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 359 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 360 if (isAMDGCN(getTriple())) { 361 Builder.defineMacro("__amdgcn_processor__", 362 Twine("\"") + Twine(CanonName) + Twine("\"")); 363 Builder.defineMacro("__amdgcn_target_id__", 364 Twine("\"") + Twine(getTargetID().getValue()) + 365 Twine("\"")); 366 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 367 auto Loc = OffloadArchFeatures.find(F); 368 if (Loc != OffloadArchFeatures.end()) { 369 std::string NewF = F.str(); 370 std::replace(NewF.begin(), NewF.end(), '-', '_'); 371 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 372 Twine("__"), 373 Loc->second ? "1" : "0"); 374 } 375 } 376 } 377 } 378 379 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 380 // removed in the near future. 381 if (hasFMAF()) 382 Builder.defineMacro("__HAS_FMAF__"); 383 if (hasFastFMAF()) 384 Builder.defineMacro("FP_FAST_FMAF"); 385 if (hasLDEXPF()) 386 Builder.defineMacro("__HAS_LDEXPF__"); 387 if (hasFP64()) 388 Builder.defineMacro("__HAS_FP64__"); 389 if (hasFastFMA()) 390 Builder.defineMacro("FP_FAST_FMA"); 391 } 392 393 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 394 assert(HalfFormat == Aux->HalfFormat); 395 assert(FloatFormat == Aux->FloatFormat); 396 assert(DoubleFormat == Aux->DoubleFormat); 397 398 // On x86_64 long double is 80-bit extended precision format, which is 399 // not supported by AMDGPU. 128-bit floating point format is also not 400 // supported by AMDGPU. Therefore keep its own format for these two types. 401 auto SaveLongDoubleFormat = LongDoubleFormat; 402 auto SaveFloat128Format = Float128Format; 403 copyAuxTarget(Aux); 404 LongDoubleFormat = SaveLongDoubleFormat; 405 Float128Format = SaveFloat128Format; 406 // For certain builtin types support on the host target, claim they are 407 // support to pass the compilation of the host code during the device-side 408 // compilation. 409 // FIXME: As the side effect, we also accept `__float128` uses in the device 410 // code. To rejct these builtin types supported in the host target but not in 411 // the device target, one approach would support `device_builtin` attribute 412 // so that we could tell the device builtin types from the host ones. The 413 // also solves the different representations of the same builtin type, such 414 // as `size_t` in the MSVC environment. 415 if (Aux->hasFloat128Type()) { 416 HasFloat128 = true; 417 Float128Format = DoubleFormat; 418 } 419 } 420