1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/IR/DataLayout.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 39 "-ni:7"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 Generic, // Default 43 Global, // opencl_global 44 Local, // opencl_local 45 Constant, // opencl_constant 46 Private, // opencl_private 47 Generic, // opencl_generic 48 Global, // cuda_device 49 Constant, // cuda_constant 50 Local, // cuda_shared 51 Generic, // ptr32_sptr 52 Generic, // ptr32_uptr 53 Generic // ptr64 54 }; 55 56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 57 Private, // Default 58 Global, // opencl_global 59 Local, // opencl_local 60 Constant, // opencl_constant 61 Private, // opencl_private 62 Generic, // opencl_generic 63 Global, // cuda_device 64 Constant, // cuda_constant 65 Local, // cuda_shared 66 Generic, // ptr32_sptr 67 Generic, // ptr32_uptr 68 Generic // ptr64 69 70 }; 71 } // namespace targets 72 } // namespace clang 73 74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 75 #define BUILTIN(ID, TYPE, ATTRS) \ 76 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 78 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 79 #include "clang/Basic/BuiltinsAMDGPU.def" 80 }; 81 82 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 83 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 84 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 85 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 86 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 87 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 88 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 89 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 90 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 91 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 92 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 93 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 94 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 95 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 96 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 97 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 98 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 99 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 100 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 101 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 102 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 103 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 104 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 105 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 106 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 107 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 108 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 109 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 110 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 111 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 112 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 113 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 114 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 115 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 116 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 117 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 118 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 119 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 120 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 121 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 122 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 123 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 124 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 125 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 126 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 127 "flat_scratch_lo", "flat_scratch_hi", 128 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 129 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 130 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 131 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 132 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 133 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 134 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 135 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 136 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 137 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 138 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 139 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 140 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 141 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 142 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 143 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 144 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 145 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 146 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 147 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 148 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 149 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 150 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 151 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 152 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 153 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 154 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 155 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 156 "a252", "a253", "a254", "a255" 157 }; 158 159 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 160 return llvm::makeArrayRef(GCCRegNames); 161 } 162 163 bool AMDGPUTargetInfo::initFeatureMap( 164 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 165 const std::vector<std::string> &FeatureVec) const { 166 167 using namespace llvm::AMDGPU; 168 169 // XXX - What does the member GPU mean if device name string passed here? 170 if (isAMDGCN(getTriple())) { 171 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 172 case GK_GFX1012: 173 case GK_GFX1011: 174 Features["dot1-insts"] = true; 175 Features["dot2-insts"] = true; 176 Features["dot5-insts"] = true; 177 Features["dot6-insts"] = true; 178 LLVM_FALLTHROUGH; 179 case GK_GFX1010: 180 Features["dl-insts"] = true; 181 Features["ci-insts"] = true; 182 Features["flat-address-space"] = true; 183 Features["16-bit-insts"] = true; 184 Features["dpp"] = true; 185 Features["gfx8-insts"] = true; 186 Features["gfx9-insts"] = true; 187 Features["gfx10-insts"] = true; 188 Features["s-memrealtime"] = true; 189 break; 190 case GK_GFX908: 191 Features["dot3-insts"] = true; 192 Features["dot4-insts"] = true; 193 Features["dot5-insts"] = true; 194 Features["dot6-insts"] = true; 195 Features["mai-insts"] = true; 196 LLVM_FALLTHROUGH; 197 case GK_GFX906: 198 Features["dl-insts"] = true; 199 Features["dot1-insts"] = true; 200 Features["dot2-insts"] = true; 201 LLVM_FALLTHROUGH; 202 case GK_GFX909: 203 case GK_GFX904: 204 case GK_GFX902: 205 case GK_GFX900: 206 Features["gfx9-insts"] = true; 207 LLVM_FALLTHROUGH; 208 case GK_GFX810: 209 case GK_GFX803: 210 case GK_GFX802: 211 case GK_GFX801: 212 Features["gfx8-insts"] = true; 213 Features["16-bit-insts"] = true; 214 Features["dpp"] = true; 215 Features["s-memrealtime"] = true; 216 LLVM_FALLTHROUGH; 217 case GK_GFX704: 218 case GK_GFX703: 219 case GK_GFX702: 220 case GK_GFX701: 221 case GK_GFX700: 222 Features["ci-insts"] = true; 223 Features["flat-address-space"] = true; 224 LLVM_FALLTHROUGH; 225 case GK_GFX601: 226 case GK_GFX600: 227 break; 228 case GK_NONE: 229 break; 230 default: 231 llvm_unreachable("Unhandled GPU!"); 232 } 233 } else { 234 if (CPU.empty()) 235 CPU = "r600"; 236 237 switch (llvm::AMDGPU::parseArchR600(CPU)) { 238 case GK_CAYMAN: 239 case GK_CYPRESS: 240 case GK_RV770: 241 case GK_RV670: 242 // TODO: Add fp64 when implemented. 243 break; 244 case GK_TURKS: 245 case GK_CAICOS: 246 case GK_BARTS: 247 case GK_SUMO: 248 case GK_REDWOOD: 249 case GK_JUNIPER: 250 case GK_CEDAR: 251 case GK_RV730: 252 case GK_RV710: 253 case GK_RS880: 254 case GK_R630: 255 case GK_R600: 256 break; 257 default: 258 llvm_unreachable("Unhandled GPU!"); 259 } 260 } 261 262 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 263 } 264 265 void AMDGPUTargetInfo::fillValidCPUList( 266 SmallVectorImpl<StringRef> &Values) const { 267 if (isAMDGCN(getTriple())) 268 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 269 else 270 llvm::AMDGPU::fillValidArchListR600(Values); 271 } 272 273 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 274 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 275 } 276 277 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 278 const TargetOptions &Opts) 279 : TargetInfo(Triple), 280 GPUKind(isAMDGCN(Triple) ? 281 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 282 llvm::AMDGPU::parseArchR600(Opts.CPU)), 283 GPUFeatures(isAMDGCN(Triple) ? 284 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 285 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 286 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 287 : DataLayoutStringR600); 288 assert(DataLayout->getAllocaAddrSpace() == Private); 289 290 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 291 !isAMDGCN(Triple)); 292 UseAddrSpaceMapMangling = true; 293 294 HasLegalHalfType = true; 295 HasFloat16 = true; 296 297 // Set pointer width and alignment for target address space 0. 298 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 299 if (getMaxPointerWidth() == 64) { 300 LongWidth = LongAlign = 64; 301 SizeType = UnsignedLong; 302 PtrDiffType = SignedLong; 303 IntPtrType = SignedLong; 304 } 305 306 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 307 } 308 309 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 310 TargetInfo::adjust(Opts); 311 // ToDo: There are still a few places using default address space as private 312 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 313 // can be removed from the following line. 314 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 315 !isAMDGCN(getTriple())); 316 } 317 318 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 319 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 320 Builtin::FirstTSBuiltin); 321 } 322 323 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 324 MacroBuilder &Builder) const { 325 Builder.defineMacro("__AMD__"); 326 Builder.defineMacro("__AMDGPU__"); 327 328 if (isAMDGCN(getTriple())) 329 Builder.defineMacro("__AMDGCN__"); 330 else 331 Builder.defineMacro("__R600__"); 332 333 if (GPUKind != llvm::AMDGPU::GK_NONE) { 334 StringRef CanonName = isAMDGCN(getTriple()) ? 335 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 336 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 337 } 338 339 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 340 // removed in the near future. 341 if (hasFMAF()) 342 Builder.defineMacro("__HAS_FMAF__"); 343 if (hasFastFMAF()) 344 Builder.defineMacro("FP_FAST_FMAF"); 345 if (hasLDEXPF()) 346 Builder.defineMacro("__HAS_LDEXPF__"); 347 if (hasFP64()) 348 Builder.defineMacro("__HAS_FP64__"); 349 if (hasFastFMA()) 350 Builder.defineMacro("FP_FAST_FMA"); 351 } 352 353 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 354 assert(HalfFormat == Aux->HalfFormat); 355 assert(FloatFormat == Aux->FloatFormat); 356 assert(DoubleFormat == Aux->DoubleFormat); 357 358 // On x86_64 long double is 80-bit extended precision format, which is 359 // not supported by AMDGPU. 128-bit floating point format is also not 360 // supported by AMDGPU. Therefore keep its own format for these two types. 361 auto SaveLongDoubleFormat = LongDoubleFormat; 362 auto SaveFloat128Format = Float128Format; 363 copyAuxTarget(Aux); 364 LongDoubleFormat = SaveLongDoubleFormat; 365 Float128Format = SaveFloat128Format; 366 // For certain builtin types support on the host target, claim they are 367 // support to pass the compilation of the host code during the device-side 368 // compilation. 369 // FIXME: As the side effect, we also accept `__float128` uses in the device 370 // code. To rejct these builtin types supported in the host target but not in 371 // the device target, one approach would support `device_builtin` attribute 372 // so that we could tell the device builtin types from the host ones. The 373 // also solves the different representations of the same builtin type, such 374 // as `size_t` in the MSVC environment. 375 if (Aux->hasFloat128Type()) { 376 HasFloat128 = true; 377 Float128Format = DoubleFormat; 378 } 379 } 380