1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 if (CPU.empty()) 135 CPU = "gfx600"; 136 137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 138 case GK_GFX1012: 139 case GK_GFX1011: 140 Features["dot1-insts"] = true; 141 Features["dot2-insts"] = true; 142 Features["dot5-insts"] = true; 143 Features["dot6-insts"] = true; 144 LLVM_FALLTHROUGH; 145 case GK_GFX1010: 146 Features["dl-insts"] = true; 147 Features["ci-insts"] = true; 148 Features["16-bit-insts"] = true; 149 Features["dpp"] = true; 150 Features["gfx8-insts"] = true; 151 Features["gfx9-insts"] = true; 152 Features["gfx10-insts"] = true; 153 Features["s-memrealtime"] = true; 154 break; 155 case GK_GFX906: 156 Features["dl-insts"] = true; 157 Features["dot1-insts"] = true; 158 Features["dot2-insts"] = true; 159 LLVM_FALLTHROUGH; 160 case GK_GFX909: 161 case GK_GFX904: 162 case GK_GFX902: 163 case GK_GFX900: 164 Features["gfx9-insts"] = true; 165 LLVM_FALLTHROUGH; 166 case GK_GFX810: 167 case GK_GFX803: 168 case GK_GFX802: 169 case GK_GFX801: 170 Features["gfx8-insts"] = true; 171 Features["16-bit-insts"] = true; 172 Features["dpp"] = true; 173 Features["s-memrealtime"] = true; 174 LLVM_FALLTHROUGH; 175 case GK_GFX704: 176 case GK_GFX703: 177 case GK_GFX702: 178 case GK_GFX701: 179 case GK_GFX700: 180 Features["ci-insts"] = true; 181 LLVM_FALLTHROUGH; 182 case GK_GFX601: 183 case GK_GFX600: 184 break; 185 case GK_NONE: 186 return false; 187 default: 188 llvm_unreachable("Unhandled GPU!"); 189 } 190 } else { 191 if (CPU.empty()) 192 CPU = "r600"; 193 194 switch (llvm::AMDGPU::parseArchR600(CPU)) { 195 case GK_CAYMAN: 196 case GK_CYPRESS: 197 case GK_RV770: 198 case GK_RV670: 199 // TODO: Add fp64 when implemented. 200 break; 201 case GK_TURKS: 202 case GK_CAICOS: 203 case GK_BARTS: 204 case GK_SUMO: 205 case GK_REDWOOD: 206 case GK_JUNIPER: 207 case GK_CEDAR: 208 case GK_RV730: 209 case GK_RV710: 210 case GK_RS880: 211 case GK_R630: 212 case GK_R600: 213 break; 214 default: 215 llvm_unreachable("Unhandled GPU!"); 216 } 217 } 218 219 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 220 } 221 222 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 223 TargetOptions &TargetOpts) const { 224 bool hasFP32Denormals = false; 225 bool hasFP64Denormals = false; 226 227 for (auto &I : TargetOpts.FeaturesAsWritten) { 228 if (I == "+fp32-denormals" || I == "-fp32-denormals") 229 hasFP32Denormals = true; 230 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 231 hasFP64Denormals = true; 232 } 233 if (!hasFP32Denormals) 234 TargetOpts.Features.push_back( 235 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 236 ? '+' : '-') + Twine("fp32-denormals")) 237 .str()); 238 // Always do not flush fp64 or fp16 denorms. 239 if (!hasFP64Denormals && hasFP64()) 240 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 241 } 242 243 void AMDGPUTargetInfo::fillValidCPUList( 244 SmallVectorImpl<StringRef> &Values) const { 245 if (isAMDGCN(getTriple())) 246 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 247 else 248 llvm::AMDGPU::fillValidArchListR600(Values); 249 } 250 251 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 252 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 253 } 254 255 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 256 const TargetOptions &Opts) 257 : TargetInfo(Triple), 258 GPUKind(isAMDGCN(Triple) ? 259 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 260 llvm::AMDGPU::parseArchR600(Opts.CPU)), 261 GPUFeatures(isAMDGCN(Triple) ? 262 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 263 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 264 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 265 : DataLayoutStringR600); 266 assert(DataLayout->getAllocaAddrSpace() == Private); 267 268 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 269 !isAMDGCN(Triple)); 270 UseAddrSpaceMapMangling = true; 271 272 HasLegalHalfType = true; 273 HasFloat16 = true; 274 275 // Set pointer width and alignment for target address space 0. 276 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 277 if (getMaxPointerWidth() == 64) { 278 LongWidth = LongAlign = 64; 279 SizeType = UnsignedLong; 280 PtrDiffType = SignedLong; 281 IntPtrType = SignedLong; 282 } 283 284 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 285 } 286 287 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 288 TargetInfo::adjust(Opts); 289 // ToDo: There are still a few places using default address space as private 290 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 291 // can be removed from the following line. 292 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 293 !isAMDGCN(getTriple())); 294 } 295 296 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 297 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 298 Builtin::FirstTSBuiltin); 299 } 300 301 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 302 MacroBuilder &Builder) const { 303 Builder.defineMacro("__AMD__"); 304 Builder.defineMacro("__AMDGPU__"); 305 306 if (isAMDGCN(getTriple())) 307 Builder.defineMacro("__AMDGCN__"); 308 else 309 Builder.defineMacro("__R600__"); 310 311 if (GPUKind != llvm::AMDGPU::GK_NONE) { 312 StringRef CanonName = isAMDGCN(getTriple()) ? 313 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 314 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 315 } 316 317 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 318 // removed in the near future. 319 if (hasFMAF()) 320 Builder.defineMacro("__HAS_FMAF__"); 321 if (hasFastFMAF()) 322 Builder.defineMacro("FP_FAST_FMAF"); 323 if (hasLDEXPF()) 324 Builder.defineMacro("__HAS_LDEXPF__"); 325 if (hasFP64()) 326 Builder.defineMacro("__HAS_FP64__"); 327 if (hasFastFMA()) 328 Builder.defineMacro("FP_FAST_FMA"); 329 } 330 331 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 332 assert(HalfFormat == Aux->HalfFormat); 333 assert(FloatFormat == Aux->FloatFormat); 334 assert(DoubleFormat == Aux->DoubleFormat); 335 336 // On x86_64 long double is 80-bit extended precision format, which is 337 // not supported by AMDGPU. 128-bit floating point format is also not 338 // supported by AMDGPU. Therefore keep its own format for these two types. 339 auto SaveLongDoubleFormat = LongDoubleFormat; 340 auto SaveFloat128Format = Float128Format; 341 copyAuxTarget(Aux); 342 LongDoubleFormat = SaveLongDoubleFormat; 343 Float128Format = SaveFloat128Format; 344 } 345