1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 135 case GK_GFX1012: 136 case GK_GFX1011: 137 Features["dot1-insts"] = true; 138 Features["dot2-insts"] = true; 139 Features["dot5-insts"] = true; 140 Features["dot6-insts"] = true; 141 LLVM_FALLTHROUGH; 142 case GK_GFX1010: 143 Features["dl-insts"] = true; 144 Features["ci-insts"] = true; 145 Features["16-bit-insts"] = true; 146 Features["dpp"] = true; 147 Features["gfx8-insts"] = true; 148 Features["gfx9-insts"] = true; 149 Features["gfx10-insts"] = true; 150 Features["s-memrealtime"] = true; 151 break; 152 case GK_GFX908: 153 Features["dot3-insts"] = true; 154 Features["dot4-insts"] = true; 155 Features["dot5-insts"] = true; 156 Features["dot6-insts"] = true; 157 LLVM_FALLTHROUGH; 158 case GK_GFX906: 159 Features["dl-insts"] = true; 160 Features["dot1-insts"] = true; 161 Features["dot2-insts"] = true; 162 LLVM_FALLTHROUGH; 163 case GK_GFX909: 164 case GK_GFX904: 165 case GK_GFX902: 166 case GK_GFX900: 167 Features["gfx9-insts"] = true; 168 LLVM_FALLTHROUGH; 169 case GK_GFX810: 170 case GK_GFX803: 171 case GK_GFX802: 172 case GK_GFX801: 173 Features["gfx8-insts"] = true; 174 Features["16-bit-insts"] = true; 175 Features["dpp"] = true; 176 Features["s-memrealtime"] = true; 177 LLVM_FALLTHROUGH; 178 case GK_GFX704: 179 case GK_GFX703: 180 case GK_GFX702: 181 case GK_GFX701: 182 case GK_GFX700: 183 Features["ci-insts"] = true; 184 LLVM_FALLTHROUGH; 185 case GK_GFX601: 186 case GK_GFX600: 187 break; 188 case GK_NONE: 189 break; 190 default: 191 llvm_unreachable("Unhandled GPU!"); 192 } 193 } else { 194 if (CPU.empty()) 195 CPU = "r600"; 196 197 switch (llvm::AMDGPU::parseArchR600(CPU)) { 198 case GK_CAYMAN: 199 case GK_CYPRESS: 200 case GK_RV770: 201 case GK_RV670: 202 // TODO: Add fp64 when implemented. 203 break; 204 case GK_TURKS: 205 case GK_CAICOS: 206 case GK_BARTS: 207 case GK_SUMO: 208 case GK_REDWOOD: 209 case GK_JUNIPER: 210 case GK_CEDAR: 211 case GK_RV730: 212 case GK_RV710: 213 case GK_RS880: 214 case GK_R630: 215 case GK_R600: 216 break; 217 default: 218 llvm_unreachable("Unhandled GPU!"); 219 } 220 } 221 222 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 223 } 224 225 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 226 TargetOptions &TargetOpts) const { 227 bool hasFP32Denormals = false; 228 bool hasFP64Denormals = false; 229 230 for (auto &I : TargetOpts.FeaturesAsWritten) { 231 if (I == "+fp32-denormals" || I == "-fp32-denormals") 232 hasFP32Denormals = true; 233 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 234 hasFP64Denormals = true; 235 } 236 if (!hasFP32Denormals) 237 TargetOpts.Features.push_back( 238 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 239 ? '+' : '-') + Twine("fp32-denormals")) 240 .str()); 241 // Always do not flush fp64 or fp16 denorms. 242 if (!hasFP64Denormals && hasFP64()) 243 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 244 } 245 246 void AMDGPUTargetInfo::fillValidCPUList( 247 SmallVectorImpl<StringRef> &Values) const { 248 if (isAMDGCN(getTriple())) 249 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 250 else 251 llvm::AMDGPU::fillValidArchListR600(Values); 252 } 253 254 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 255 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 256 } 257 258 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 259 const TargetOptions &Opts) 260 : TargetInfo(Triple), 261 GPUKind(isAMDGCN(Triple) ? 262 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 263 llvm::AMDGPU::parseArchR600(Opts.CPU)), 264 GPUFeatures(isAMDGCN(Triple) ? 265 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 266 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 267 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 268 : DataLayoutStringR600); 269 assert(DataLayout->getAllocaAddrSpace() == Private); 270 271 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 272 !isAMDGCN(Triple)); 273 UseAddrSpaceMapMangling = true; 274 275 HasLegalHalfType = true; 276 HasFloat16 = true; 277 278 // Set pointer width and alignment for target address space 0. 279 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 280 if (getMaxPointerWidth() == 64) { 281 LongWidth = LongAlign = 64; 282 SizeType = UnsignedLong; 283 PtrDiffType = SignedLong; 284 IntPtrType = SignedLong; 285 } 286 287 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 288 } 289 290 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 291 TargetInfo::adjust(Opts); 292 // ToDo: There are still a few places using default address space as private 293 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 294 // can be removed from the following line. 295 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 296 !isAMDGCN(getTriple())); 297 } 298 299 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 300 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 301 Builtin::FirstTSBuiltin); 302 } 303 304 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 305 MacroBuilder &Builder) const { 306 Builder.defineMacro("__AMD__"); 307 Builder.defineMacro("__AMDGPU__"); 308 309 if (isAMDGCN(getTriple())) 310 Builder.defineMacro("__AMDGCN__"); 311 else 312 Builder.defineMacro("__R600__"); 313 314 if (GPUKind != llvm::AMDGPU::GK_NONE) { 315 StringRef CanonName = isAMDGCN(getTriple()) ? 316 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 317 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 318 } 319 320 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 321 // removed in the near future. 322 if (hasFMAF()) 323 Builder.defineMacro("__HAS_FMAF__"); 324 if (hasFastFMAF()) 325 Builder.defineMacro("FP_FAST_FMAF"); 326 if (hasLDEXPF()) 327 Builder.defineMacro("__HAS_LDEXPF__"); 328 if (hasFP64()) 329 Builder.defineMacro("__HAS_FP64__"); 330 if (hasFastFMA()) 331 Builder.defineMacro("FP_FAST_FMA"); 332 } 333 334 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 335 assert(HalfFormat == Aux->HalfFormat); 336 assert(FloatFormat == Aux->FloatFormat); 337 assert(DoubleFormat == Aux->DoubleFormat); 338 339 // On x86_64 long double is 80-bit extended precision format, which is 340 // not supported by AMDGPU. 128-bit floating point format is also not 341 // supported by AMDGPU. Therefore keep its own format for these two types. 342 auto SaveLongDoubleFormat = LongDoubleFormat; 343 auto SaveFloat128Format = Float128Format; 344 copyAuxTarget(Aux); 345 LongDoubleFormat = SaveLongDoubleFormat; 346 Float128Format = SaveFloat128Format; 347 } 348