1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 38 39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 40 Generic, // Default 41 Global, // opencl_global 42 Local, // opencl_local 43 Constant, // opencl_constant 44 Private, // opencl_private 45 Generic, // opencl_generic 46 Global, // cuda_device 47 Constant, // cuda_constant 48 Local // cuda_shared 49 }; 50 51 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 52 Private, // Default 53 Global, // opencl_global 54 Local, // opencl_local 55 Constant, // opencl_constant 56 Private, // opencl_private 57 Generic, // opencl_generic 58 Global, // cuda_device 59 Constant, // cuda_constant 60 Local // cuda_shared 61 }; 62 } // namespace targets 63 } // namespace clang 64 65 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 66 #define BUILTIN(ID, TYPE, ATTRS) \ 67 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 68 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 69 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 70 #include "clang/Basic/BuiltinsAMDGPU.def" 71 }; 72 73 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 74 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 75 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 76 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 77 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 78 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 79 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 80 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 81 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 82 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 83 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 84 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 85 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 86 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 87 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 88 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 89 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 90 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 91 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 92 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 93 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 94 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 95 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 96 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 97 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 98 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 99 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 100 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 101 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 102 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 103 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 104 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 105 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 106 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 107 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 108 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 109 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 110 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 111 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 112 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 113 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 114 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 115 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 116 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 117 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 118 "flat_scratch_lo", "flat_scratch_hi" 119 }; 120 121 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 122 return llvm::makeArrayRef(GCCRegNames); 123 } 124 125 bool AMDGPUTargetInfo::initFeatureMap( 126 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 127 const std::vector<std::string> &FeatureVec) const { 128 129 using namespace llvm::AMDGPU; 130 131 // XXX - What does the member GPU mean if device name string passed here? 132 if (isAMDGCN(getTriple())) { 133 if (CPU.empty()) 134 CPU = "gfx600"; 135 136 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 137 case GK_GFX906: 138 Features["dl-insts"] = true; 139 Features["dot1-insts"] = true; 140 Features["dot2-insts"] = true; 141 LLVM_FALLTHROUGH; 142 case GK_GFX909: 143 case GK_GFX904: 144 case GK_GFX902: 145 case GK_GFX900: 146 Features["gfx9-insts"] = true; 147 LLVM_FALLTHROUGH; 148 case GK_GFX810: 149 case GK_GFX803: 150 case GK_GFX802: 151 case GK_GFX801: 152 Features["vi-insts"] = true; 153 Features["16-bit-insts"] = true; 154 Features["dpp"] = true; 155 Features["s-memrealtime"] = true; 156 LLVM_FALLTHROUGH; 157 case GK_GFX704: 158 case GK_GFX703: 159 case GK_GFX702: 160 case GK_GFX701: 161 case GK_GFX700: 162 Features["ci-insts"] = true; 163 LLVM_FALLTHROUGH; 164 case GK_GFX601: 165 case GK_GFX600: 166 break; 167 case GK_NONE: 168 return false; 169 default: 170 llvm_unreachable("Unhandled GPU!"); 171 } 172 } else { 173 if (CPU.empty()) 174 CPU = "r600"; 175 176 switch (llvm::AMDGPU::parseArchR600(CPU)) { 177 case GK_CAYMAN: 178 case GK_CYPRESS: 179 case GK_RV770: 180 case GK_RV670: 181 // TODO: Add fp64 when implemented. 182 break; 183 case GK_TURKS: 184 case GK_CAICOS: 185 case GK_BARTS: 186 case GK_SUMO: 187 case GK_REDWOOD: 188 case GK_JUNIPER: 189 case GK_CEDAR: 190 case GK_RV730: 191 case GK_RV710: 192 case GK_RS880: 193 case GK_R630: 194 case GK_R600: 195 break; 196 default: 197 llvm_unreachable("Unhandled GPU!"); 198 } 199 } 200 201 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 202 } 203 204 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 205 TargetOptions &TargetOpts) const { 206 bool hasFP32Denormals = false; 207 bool hasFP64Denormals = false; 208 209 for (auto &I : TargetOpts.FeaturesAsWritten) { 210 if (I == "+fp32-denormals" || I == "-fp32-denormals") 211 hasFP32Denormals = true; 212 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 213 hasFP64Denormals = true; 214 } 215 if (!hasFP32Denormals) 216 TargetOpts.Features.push_back( 217 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 218 ? '+' : '-') + Twine("fp32-denormals")) 219 .str()); 220 // Always do not flush fp64 or fp16 denorms. 221 if (!hasFP64Denormals && hasFP64()) 222 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 223 } 224 225 void AMDGPUTargetInfo::fillValidCPUList( 226 SmallVectorImpl<StringRef> &Values) const { 227 if (isAMDGCN(getTriple())) 228 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 229 else 230 llvm::AMDGPU::fillValidArchListR600(Values); 231 } 232 233 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 234 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 235 } 236 237 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 238 const TargetOptions &Opts) 239 : TargetInfo(Triple), 240 GPUKind(isAMDGCN(Triple) ? 241 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 242 llvm::AMDGPU::parseArchR600(Opts.CPU)), 243 GPUFeatures(isAMDGCN(Triple) ? 244 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 245 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 246 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 247 : DataLayoutStringR600); 248 assert(DataLayout->getAllocaAddrSpace() == Private); 249 250 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 251 !isAMDGCN(Triple)); 252 UseAddrSpaceMapMangling = true; 253 254 // Set pointer width and alignment for target address space 0. 255 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 256 if (getMaxPointerWidth() == 64) { 257 LongWidth = LongAlign = 64; 258 SizeType = UnsignedLong; 259 PtrDiffType = SignedLong; 260 IntPtrType = SignedLong; 261 } 262 263 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 264 } 265 266 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 267 TargetInfo::adjust(Opts); 268 // ToDo: There are still a few places using default address space as private 269 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 270 // can be removed from the following line. 271 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 272 !isAMDGCN(getTriple())); 273 } 274 275 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 276 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 277 Builtin::FirstTSBuiltin); 278 } 279 280 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 281 MacroBuilder &Builder) const { 282 Builder.defineMacro("__AMD__"); 283 Builder.defineMacro("__AMDGPU__"); 284 285 if (isAMDGCN(getTriple())) 286 Builder.defineMacro("__AMDGCN__"); 287 else 288 Builder.defineMacro("__R600__"); 289 290 if (GPUKind != llvm::AMDGPU::GK_NONE) { 291 StringRef CanonName = isAMDGCN(getTriple()) ? 292 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 293 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 294 } 295 296 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 297 // removed in the near future. 298 if (hasFMAF()) 299 Builder.defineMacro("__HAS_FMAF__"); 300 if (hasFastFMAF()) 301 Builder.defineMacro("FP_FAST_FMAF"); 302 if (hasLDEXPF()) 303 Builder.defineMacro("__HAS_LDEXPF__"); 304 if (hasFP64()) 305 Builder.defineMacro("__HAS_FP64__"); 306 if (hasFastFMA()) 307 Builder.defineMacro("FP_FAST_FMA"); 308 } 309 310 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 311 assert(HalfFormat == Aux->HalfFormat); 312 assert(FloatFormat == Aux->FloatFormat); 313 assert(DoubleFormat == Aux->DoubleFormat); 314 315 // On x86_64 long double is 80-bit extended precision format, which is 316 // not supported by AMDGPU. 128-bit floating point format is also not 317 // supported by AMDGPU. Therefore keep its own format for these two types. 318 auto SaveLongDoubleFormat = LongDoubleFormat; 319 auto SaveFloat128Format = Float128Format; 320 copyAuxTarget(Aux); 321 LongDoubleFormat = SaveLongDoubleFormat; 322 Float128Format = SaveFloat128Format; 323 } 324