1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 #include "llvm/IR/DataLayout.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 40 "-ni:7"; 41 42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 43 Generic, // Default 44 Global, // opencl_global 45 Local, // opencl_local 46 Constant, // opencl_constant 47 Private, // opencl_private 48 Generic, // opencl_generic 49 Global, // cuda_device 50 Constant, // cuda_constant 51 Local, // cuda_shared 52 Generic, // ptr32_sptr 53 Generic, // ptr32_uptr 54 Generic // ptr64 55 }; 56 57 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 58 Private, // Default 59 Global, // opencl_global 60 Local, // opencl_local 61 Constant, // opencl_constant 62 Private, // opencl_private 63 Generic, // opencl_generic 64 Global, // cuda_device 65 Constant, // cuda_constant 66 Local, // cuda_shared 67 Generic, // ptr32_sptr 68 Generic, // ptr32_uptr 69 Generic // ptr64 70 71 }; 72 } // namespace targets 73 } // namespace clang 74 75 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 76 #define BUILTIN(ID, TYPE, ATTRS) \ 77 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 78 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 79 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 80 #include "clang/Basic/BuiltinsAMDGPU.def" 81 }; 82 83 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 84 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 85 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 86 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 87 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 88 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 89 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 90 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 91 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 92 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 93 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 94 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 95 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 96 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 97 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 98 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 99 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 100 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 101 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 102 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 103 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 104 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 105 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 106 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 107 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 108 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 109 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 110 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 111 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 112 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 113 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 114 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 115 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 116 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 117 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 118 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 119 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 120 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 121 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 122 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 123 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 124 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 125 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 126 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 127 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 128 "flat_scratch_lo", "flat_scratch_hi", 129 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 130 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 131 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 132 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 133 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 134 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 135 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 136 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 137 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 138 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 139 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 140 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 141 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 142 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 143 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 144 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 145 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 146 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 147 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 148 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 149 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 150 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 151 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 152 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 153 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 154 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 155 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 156 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 157 "a252", "a253", "a254", "a255" 158 }; 159 160 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 161 return llvm::makeArrayRef(GCCRegNames); 162 } 163 164 bool AMDGPUTargetInfo::initFeatureMap( 165 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 166 const std::vector<std::string> &FeatureVec) const { 167 168 using namespace llvm::AMDGPU; 169 170 // XXX - What does the member GPU mean if device name string passed here? 171 if (isAMDGCN(getTriple())) { 172 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 173 case GK_GFX1012: 174 case GK_GFX1011: 175 Features["dot1-insts"] = true; 176 Features["dot2-insts"] = true; 177 Features["dot5-insts"] = true; 178 Features["dot6-insts"] = true; 179 LLVM_FALLTHROUGH; 180 case GK_GFX1010: 181 Features["dl-insts"] = true; 182 Features["ci-insts"] = true; 183 Features["flat-address-space"] = true; 184 Features["16-bit-insts"] = true; 185 Features["dpp"] = true; 186 Features["gfx8-insts"] = true; 187 Features["gfx9-insts"] = true; 188 Features["gfx10-insts"] = true; 189 Features["s-memrealtime"] = true; 190 break; 191 case GK_GFX908: 192 Features["dot3-insts"] = true; 193 Features["dot4-insts"] = true; 194 Features["dot5-insts"] = true; 195 Features["dot6-insts"] = true; 196 Features["mai-insts"] = true; 197 LLVM_FALLTHROUGH; 198 case GK_GFX906: 199 Features["dl-insts"] = true; 200 Features["dot1-insts"] = true; 201 Features["dot2-insts"] = true; 202 LLVM_FALLTHROUGH; 203 case GK_GFX909: 204 case GK_GFX904: 205 case GK_GFX902: 206 case GK_GFX900: 207 Features["gfx9-insts"] = true; 208 LLVM_FALLTHROUGH; 209 case GK_GFX810: 210 case GK_GFX803: 211 case GK_GFX802: 212 case GK_GFX801: 213 Features["gfx8-insts"] = true; 214 Features["16-bit-insts"] = true; 215 Features["dpp"] = true; 216 Features["s-memrealtime"] = true; 217 LLVM_FALLTHROUGH; 218 case GK_GFX704: 219 case GK_GFX703: 220 case GK_GFX702: 221 case GK_GFX701: 222 case GK_GFX700: 223 Features["ci-insts"] = true; 224 Features["flat-address-space"] = true; 225 LLVM_FALLTHROUGH; 226 case GK_GFX601: 227 case GK_GFX600: 228 break; 229 case GK_NONE: 230 break; 231 default: 232 llvm_unreachable("Unhandled GPU!"); 233 } 234 } else { 235 if (CPU.empty()) 236 CPU = "r600"; 237 238 switch (llvm::AMDGPU::parseArchR600(CPU)) { 239 case GK_CAYMAN: 240 case GK_CYPRESS: 241 case GK_RV770: 242 case GK_RV670: 243 // TODO: Add fp64 when implemented. 244 break; 245 case GK_TURKS: 246 case GK_CAICOS: 247 case GK_BARTS: 248 case GK_SUMO: 249 case GK_REDWOOD: 250 case GK_JUNIPER: 251 case GK_CEDAR: 252 case GK_RV730: 253 case GK_RV710: 254 case GK_RS880: 255 case GK_R630: 256 case GK_R600: 257 break; 258 default: 259 llvm_unreachable("Unhandled GPU!"); 260 } 261 } 262 263 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 264 } 265 266 void AMDGPUTargetInfo::fillValidCPUList( 267 SmallVectorImpl<StringRef> &Values) const { 268 if (isAMDGCN(getTriple())) 269 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 270 else 271 llvm::AMDGPU::fillValidArchListR600(Values); 272 } 273 274 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 275 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 276 } 277 278 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 279 const TargetOptions &Opts) 280 : TargetInfo(Triple), 281 GPUKind(isAMDGCN(Triple) ? 282 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 283 llvm::AMDGPU::parseArchR600(Opts.CPU)), 284 GPUFeatures(isAMDGCN(Triple) ? 285 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 286 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 287 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 288 : DataLayoutStringR600); 289 assert(DataLayout->getAllocaAddrSpace() == Private); 290 GridValues = llvm::omp::AMDGPUGpuGridValues; 291 292 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 293 !isAMDGCN(Triple)); 294 UseAddrSpaceMapMangling = true; 295 296 HasLegalHalfType = true; 297 HasFloat16 = true; 298 299 // Set pointer width and alignment for target address space 0. 300 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 301 if (getMaxPointerWidth() == 64) { 302 LongWidth = LongAlign = 64; 303 SizeType = UnsignedLong; 304 PtrDiffType = SignedLong; 305 IntPtrType = SignedLong; 306 } 307 308 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 309 } 310 311 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 312 TargetInfo::adjust(Opts); 313 // ToDo: There are still a few places using default address space as private 314 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 315 // can be removed from the following line. 316 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 317 !isAMDGCN(getTriple())); 318 } 319 320 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 321 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 322 Builtin::FirstTSBuiltin); 323 } 324 325 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 326 MacroBuilder &Builder) const { 327 Builder.defineMacro("__AMD__"); 328 Builder.defineMacro("__AMDGPU__"); 329 330 if (isAMDGCN(getTriple())) 331 Builder.defineMacro("__AMDGCN__"); 332 else 333 Builder.defineMacro("__R600__"); 334 335 if (GPUKind != llvm::AMDGPU::GK_NONE) { 336 StringRef CanonName = isAMDGCN(getTriple()) ? 337 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 338 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 339 } 340 341 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 342 // removed in the near future. 343 if (hasFMAF()) 344 Builder.defineMacro("__HAS_FMAF__"); 345 if (hasFastFMAF()) 346 Builder.defineMacro("FP_FAST_FMAF"); 347 if (hasLDEXPF()) 348 Builder.defineMacro("__HAS_LDEXPF__"); 349 if (hasFP64()) 350 Builder.defineMacro("__HAS_FP64__"); 351 if (hasFastFMA()) 352 Builder.defineMacro("FP_FAST_FMA"); 353 } 354 355 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 356 assert(HalfFormat == Aux->HalfFormat); 357 assert(FloatFormat == Aux->FloatFormat); 358 assert(DoubleFormat == Aux->DoubleFormat); 359 360 // On x86_64 long double is 80-bit extended precision format, which is 361 // not supported by AMDGPU. 128-bit floating point format is also not 362 // supported by AMDGPU. Therefore keep its own format for these two types. 363 auto SaveLongDoubleFormat = LongDoubleFormat; 364 auto SaveFloat128Format = Float128Format; 365 copyAuxTarget(Aux); 366 LongDoubleFormat = SaveLongDoubleFormat; 367 Float128Format = SaveFloat128Format; 368 // For certain builtin types support on the host target, claim they are 369 // support to pass the compilation of the host code during the device-side 370 // compilation. 371 // FIXME: As the side effect, we also accept `__float128` uses in the device 372 // code. To rejct these builtin types supported in the host target but not in 373 // the device target, one approach would support `device_builtin` attribute 374 // so that we could tell the device builtin types from the host ones. The 375 // also solves the different representations of the same builtin type, such 376 // as `size_t` in the MSVC environment. 377 if (Aux->hasFloat128Type()) { 378 HasFloat128 = true; 379 Float128Format = DoubleFormat; 380 } 381 } 382