1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 if (CPU.empty()) 135 CPU = "gfx600"; 136 137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 138 case GK_GFX1012: 139 case GK_GFX1011: 140 Features["dot1-insts"] = true; 141 Features["dot2-insts"] = true; 142 Features["dot5-insts"] = true; 143 Features["dot6-insts"] = true; 144 LLVM_FALLTHROUGH; 145 case GK_GFX1010: 146 Features["dl-insts"] = true; 147 Features["16-bit-insts"] = true; 148 Features["dpp"] = true; 149 Features["gfx9-insts"] = true; 150 Features["gfx10-insts"] = true; 151 Features["s-memrealtime"] = true; 152 break; 153 case GK_GFX906: 154 Features["dl-insts"] = true; 155 Features["dot1-insts"] = true; 156 Features["dot2-insts"] = true; 157 LLVM_FALLTHROUGH; 158 case GK_GFX909: 159 case GK_GFX904: 160 case GK_GFX902: 161 case GK_GFX900: 162 Features["gfx9-insts"] = true; 163 LLVM_FALLTHROUGH; 164 case GK_GFX810: 165 case GK_GFX803: 166 case GK_GFX802: 167 case GK_GFX801: 168 Features["gfx8-insts"] = true; 169 Features["16-bit-insts"] = true; 170 Features["dpp"] = true; 171 Features["s-memrealtime"] = true; 172 LLVM_FALLTHROUGH; 173 case GK_GFX704: 174 case GK_GFX703: 175 case GK_GFX702: 176 case GK_GFX701: 177 case GK_GFX700: 178 Features["ci-insts"] = true; 179 LLVM_FALLTHROUGH; 180 case GK_GFX601: 181 case GK_GFX600: 182 break; 183 case GK_NONE: 184 return false; 185 default: 186 llvm_unreachable("Unhandled GPU!"); 187 } 188 } else { 189 if (CPU.empty()) 190 CPU = "r600"; 191 192 switch (llvm::AMDGPU::parseArchR600(CPU)) { 193 case GK_CAYMAN: 194 case GK_CYPRESS: 195 case GK_RV770: 196 case GK_RV670: 197 // TODO: Add fp64 when implemented. 198 break; 199 case GK_TURKS: 200 case GK_CAICOS: 201 case GK_BARTS: 202 case GK_SUMO: 203 case GK_REDWOOD: 204 case GK_JUNIPER: 205 case GK_CEDAR: 206 case GK_RV730: 207 case GK_RV710: 208 case GK_RS880: 209 case GK_R630: 210 case GK_R600: 211 break; 212 default: 213 llvm_unreachable("Unhandled GPU!"); 214 } 215 } 216 217 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 218 } 219 220 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 221 TargetOptions &TargetOpts) const { 222 bool hasFP32Denormals = false; 223 bool hasFP64Denormals = false; 224 225 for (auto &I : TargetOpts.FeaturesAsWritten) { 226 if (I == "+fp32-denormals" || I == "-fp32-denormals") 227 hasFP32Denormals = true; 228 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 229 hasFP64Denormals = true; 230 } 231 if (!hasFP32Denormals) 232 TargetOpts.Features.push_back( 233 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 234 ? '+' : '-') + Twine("fp32-denormals")) 235 .str()); 236 // Always do not flush fp64 or fp16 denorms. 237 if (!hasFP64Denormals && hasFP64()) 238 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 239 } 240 241 void AMDGPUTargetInfo::fillValidCPUList( 242 SmallVectorImpl<StringRef> &Values) const { 243 if (isAMDGCN(getTriple())) 244 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 245 else 246 llvm::AMDGPU::fillValidArchListR600(Values); 247 } 248 249 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 250 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 251 } 252 253 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 254 const TargetOptions &Opts) 255 : TargetInfo(Triple), 256 GPUKind(isAMDGCN(Triple) ? 257 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 258 llvm::AMDGPU::parseArchR600(Opts.CPU)), 259 GPUFeatures(isAMDGCN(Triple) ? 260 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 261 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 262 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 263 : DataLayoutStringR600); 264 assert(DataLayout->getAllocaAddrSpace() == Private); 265 266 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 267 !isAMDGCN(Triple)); 268 UseAddrSpaceMapMangling = true; 269 270 HasLegalHalfType = true; 271 HasFloat16 = true; 272 273 // Set pointer width and alignment for target address space 0. 274 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 275 if (getMaxPointerWidth() == 64) { 276 LongWidth = LongAlign = 64; 277 SizeType = UnsignedLong; 278 PtrDiffType = SignedLong; 279 IntPtrType = SignedLong; 280 } 281 282 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 283 } 284 285 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 286 TargetInfo::adjust(Opts); 287 // ToDo: There are still a few places using default address space as private 288 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 289 // can be removed from the following line. 290 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 291 !isAMDGCN(getTriple())); 292 } 293 294 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 295 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 296 Builtin::FirstTSBuiltin); 297 } 298 299 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 300 MacroBuilder &Builder) const { 301 Builder.defineMacro("__AMD__"); 302 Builder.defineMacro("__AMDGPU__"); 303 304 if (isAMDGCN(getTriple())) 305 Builder.defineMacro("__AMDGCN__"); 306 else 307 Builder.defineMacro("__R600__"); 308 309 if (GPUKind != llvm::AMDGPU::GK_NONE) { 310 StringRef CanonName = isAMDGCN(getTriple()) ? 311 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 312 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 313 } 314 315 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 316 // removed in the near future. 317 if (hasFMAF()) 318 Builder.defineMacro("__HAS_FMAF__"); 319 if (hasFastFMAF()) 320 Builder.defineMacro("FP_FAST_FMAF"); 321 if (hasLDEXPF()) 322 Builder.defineMacro("__HAS_LDEXPF__"); 323 if (hasFP64()) 324 Builder.defineMacro("__HAS_FP64__"); 325 if (hasFastFMA()) 326 Builder.defineMacro("FP_FAST_FMA"); 327 } 328 329 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 330 assert(HalfFormat == Aux->HalfFormat); 331 assert(FloatFormat == Aux->FloatFormat); 332 assert(DoubleFormat == Aux->DoubleFormat); 333 334 // On x86_64 long double is 80-bit extended precision format, which is 335 // not supported by AMDGPU. 128-bit floating point format is also not 336 // supported by AMDGPU. Therefore keep its own format for these two types. 337 auto SaveLongDoubleFormat = LongDoubleFormat; 338 auto SaveFloat128Format = Float128Format; 339 copyAuxTarget(Aux); 340 LongDoubleFormat = SaveLongDoubleFormat; 341 Float128Format = SaveFloat128Format; 342 } 343