1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 if (CPU.empty()) 135 CPU = "gfx600"; 136 137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 138 case GK_GFX906: 139 Features["dl-insts"] = true; 140 Features["dot1-insts"] = true; 141 Features["dot2-insts"] = true; 142 LLVM_FALLTHROUGH; 143 case GK_GFX909: 144 case GK_GFX904: 145 case GK_GFX902: 146 case GK_GFX900: 147 Features["gfx9-insts"] = true; 148 LLVM_FALLTHROUGH; 149 case GK_GFX810: 150 case GK_GFX803: 151 case GK_GFX802: 152 case GK_GFX801: 153 Features["gfx8-insts"] = true; 154 Features["16-bit-insts"] = true; 155 Features["dpp"] = true; 156 Features["s-memrealtime"] = true; 157 LLVM_FALLTHROUGH; 158 case GK_GFX704: 159 case GK_GFX703: 160 case GK_GFX702: 161 case GK_GFX701: 162 case GK_GFX700: 163 Features["ci-insts"] = true; 164 LLVM_FALLTHROUGH; 165 case GK_GFX601: 166 case GK_GFX600: 167 break; 168 case GK_NONE: 169 return false; 170 default: 171 llvm_unreachable("Unhandled GPU!"); 172 } 173 } else { 174 if (CPU.empty()) 175 CPU = "r600"; 176 177 switch (llvm::AMDGPU::parseArchR600(CPU)) { 178 case GK_CAYMAN: 179 case GK_CYPRESS: 180 case GK_RV770: 181 case GK_RV670: 182 // TODO: Add fp64 when implemented. 183 break; 184 case GK_TURKS: 185 case GK_CAICOS: 186 case GK_BARTS: 187 case GK_SUMO: 188 case GK_REDWOOD: 189 case GK_JUNIPER: 190 case GK_CEDAR: 191 case GK_RV730: 192 case GK_RV710: 193 case GK_RS880: 194 case GK_R630: 195 case GK_R600: 196 break; 197 default: 198 llvm_unreachable("Unhandled GPU!"); 199 } 200 } 201 202 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 203 } 204 205 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 206 TargetOptions &TargetOpts) const { 207 bool hasFP32Denormals = false; 208 bool hasFP64Denormals = false; 209 210 for (auto &I : TargetOpts.FeaturesAsWritten) { 211 if (I == "+fp32-denormals" || I == "-fp32-denormals") 212 hasFP32Denormals = true; 213 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 214 hasFP64Denormals = true; 215 } 216 if (!hasFP32Denormals) 217 TargetOpts.Features.push_back( 218 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 219 ? '+' : '-') + Twine("fp32-denormals")) 220 .str()); 221 // Always do not flush fp64 or fp16 denorms. 222 if (!hasFP64Denormals && hasFP64()) 223 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 224 } 225 226 void AMDGPUTargetInfo::fillValidCPUList( 227 SmallVectorImpl<StringRef> &Values) const { 228 if (isAMDGCN(getTriple())) 229 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 230 else 231 llvm::AMDGPU::fillValidArchListR600(Values); 232 } 233 234 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 235 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 236 } 237 238 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 239 const TargetOptions &Opts) 240 : TargetInfo(Triple), 241 GPUKind(isAMDGCN(Triple) ? 242 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 243 llvm::AMDGPU::parseArchR600(Opts.CPU)), 244 GPUFeatures(isAMDGCN(Triple) ? 245 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 246 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 247 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 248 : DataLayoutStringR600); 249 assert(DataLayout->getAllocaAddrSpace() == Private); 250 251 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 252 !isAMDGCN(Triple)); 253 UseAddrSpaceMapMangling = true; 254 255 // Set pointer width and alignment for target address space 0. 256 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 257 if (getMaxPointerWidth() == 64) { 258 LongWidth = LongAlign = 64; 259 SizeType = UnsignedLong; 260 PtrDiffType = SignedLong; 261 IntPtrType = SignedLong; 262 } 263 264 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 265 } 266 267 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 268 TargetInfo::adjust(Opts); 269 // ToDo: There are still a few places using default address space as private 270 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 271 // can be removed from the following line. 272 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 273 !isAMDGCN(getTriple())); 274 } 275 276 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 277 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 278 Builtin::FirstTSBuiltin); 279 } 280 281 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 282 MacroBuilder &Builder) const { 283 Builder.defineMacro("__AMD__"); 284 Builder.defineMacro("__AMDGPU__"); 285 286 if (isAMDGCN(getTriple())) 287 Builder.defineMacro("__AMDGCN__"); 288 else 289 Builder.defineMacro("__R600__"); 290 291 if (GPUKind != llvm::AMDGPU::GK_NONE) { 292 StringRef CanonName = isAMDGCN(getTriple()) ? 293 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 294 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 295 } 296 297 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 298 // removed in the near future. 299 if (hasFMAF()) 300 Builder.defineMacro("__HAS_FMAF__"); 301 if (hasFastFMAF()) 302 Builder.defineMacro("FP_FAST_FMAF"); 303 if (hasLDEXPF()) 304 Builder.defineMacro("__HAS_LDEXPF__"); 305 if (hasFP64()) 306 Builder.defineMacro("__HAS_FP64__"); 307 if (hasFastFMA()) 308 Builder.defineMacro("FP_FAST_FMA"); 309 } 310 311 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 312 assert(HalfFormat == Aux->HalfFormat); 313 assert(FloatFormat == Aux->FloatFormat); 314 assert(DoubleFormat == Aux->DoubleFormat); 315 316 // On x86_64 long double is 80-bit extended precision format, which is 317 // not supported by AMDGPU. 128-bit floating point format is also not 318 // supported by AMDGPU. Therefore keep its own format for these two types. 319 auto SaveLongDoubleFormat = LongDoubleFormat; 320 auto SaveFloat128Format = Float128Format; 321 copyAuxTarget(Aux); 322 LongDoubleFormat = SaveLongDoubleFormat; 323 Float128Format = SaveFloat128Format; 324 } 325