1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 38 39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 40 Generic, // Default 41 Global, // opencl_global 42 Local, // opencl_local 43 Constant, // opencl_constant 44 Private, // opencl_private 45 Generic, // opencl_generic 46 Global, // cuda_device 47 Constant, // cuda_constant 48 Local // cuda_shared 49 }; 50 51 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 52 Private, // Default 53 Global, // opencl_global 54 Local, // opencl_local 55 Constant, // opencl_constant 56 Private, // opencl_private 57 Generic, // opencl_generic 58 Global, // cuda_device 59 Constant, // cuda_constant 60 Local // cuda_shared 61 }; 62 } // namespace targets 63 } // namespace clang 64 65 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 66 #define BUILTIN(ID, TYPE, ATTRS) \ 67 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 68 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 69 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 70 #include "clang/Basic/BuiltinsAMDGPU.def" 71 }; 72 73 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 74 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 75 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 76 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 77 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 78 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 79 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 80 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 81 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 82 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 83 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 84 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 85 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 86 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 87 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 88 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 89 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 90 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 91 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 92 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 93 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 94 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 95 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 96 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 97 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 98 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 99 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 100 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 101 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 102 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 103 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 104 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 105 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 106 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 107 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 108 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 109 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 110 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 111 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 112 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 113 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 114 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 115 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 116 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 117 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 118 "flat_scratch_lo", "flat_scratch_hi" 119 }; 120 121 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 122 return llvm::makeArrayRef(GCCRegNames); 123 } 124 125 bool AMDGPUTargetInfo::initFeatureMap( 126 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 127 const std::vector<std::string> &FeatureVec) const { 128 129 using namespace llvm::AMDGPU; 130 131 // XXX - What does the member GPU mean if device name string passed here? 132 if (isAMDGCN(getTriple())) { 133 if (CPU.empty()) 134 CPU = "gfx600"; 135 136 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 137 case GK_GFX906: 138 Features["dl-insts"] = true; 139 Features["dot-insts"] = true; 140 LLVM_FALLTHROUGH; 141 case GK_GFX909: 142 case GK_GFX904: 143 case GK_GFX902: 144 case GK_GFX900: 145 Features["gfx9-insts"] = true; 146 LLVM_FALLTHROUGH; 147 case GK_GFX810: 148 case GK_GFX803: 149 case GK_GFX802: 150 case GK_GFX801: 151 Features["vi-insts"] = true; 152 Features["16-bit-insts"] = true; 153 Features["dpp"] = true; 154 Features["s-memrealtime"] = true; 155 LLVM_FALLTHROUGH; 156 case GK_GFX704: 157 case GK_GFX703: 158 case GK_GFX702: 159 case GK_GFX701: 160 case GK_GFX700: 161 Features["ci-insts"] = true; 162 LLVM_FALLTHROUGH; 163 case GK_GFX601: 164 case GK_GFX600: 165 break; 166 case GK_NONE: 167 return false; 168 default: 169 llvm_unreachable("Unhandled GPU!"); 170 } 171 } else { 172 if (CPU.empty()) 173 CPU = "r600"; 174 175 switch (llvm::AMDGPU::parseArchR600(CPU)) { 176 case GK_CAYMAN: 177 case GK_CYPRESS: 178 case GK_RV770: 179 case GK_RV670: 180 // TODO: Add fp64 when implemented. 181 break; 182 case GK_TURKS: 183 case GK_CAICOS: 184 case GK_BARTS: 185 case GK_SUMO: 186 case GK_REDWOOD: 187 case GK_JUNIPER: 188 case GK_CEDAR: 189 case GK_RV730: 190 case GK_RV710: 191 case GK_RS880: 192 case GK_R630: 193 case GK_R600: 194 break; 195 default: 196 llvm_unreachable("Unhandled GPU!"); 197 } 198 } 199 200 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 201 } 202 203 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 204 TargetOptions &TargetOpts) const { 205 bool hasFP32Denormals = false; 206 bool hasFP64Denormals = false; 207 208 for (auto &I : TargetOpts.FeaturesAsWritten) { 209 if (I == "+fp32-denormals" || I == "-fp32-denormals") 210 hasFP32Denormals = true; 211 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 212 hasFP64Denormals = true; 213 } 214 if (!hasFP32Denormals) 215 TargetOpts.Features.push_back( 216 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 217 ? '+' : '-') + Twine("fp32-denormals")) 218 .str()); 219 // Always do not flush fp64 or fp16 denorms. 220 if (!hasFP64Denormals && hasFP64()) 221 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 222 } 223 224 void AMDGPUTargetInfo::fillValidCPUList( 225 SmallVectorImpl<StringRef> &Values) const { 226 if (isAMDGCN(getTriple())) 227 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 228 else 229 llvm::AMDGPU::fillValidArchListR600(Values); 230 } 231 232 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 233 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 234 } 235 236 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 237 const TargetOptions &Opts) 238 : TargetInfo(Triple), 239 GPUKind(isAMDGCN(Triple) ? 240 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 241 llvm::AMDGPU::parseArchR600(Opts.CPU)), 242 GPUFeatures(isAMDGCN(Triple) ? 243 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 244 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 245 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 246 : DataLayoutStringR600); 247 assert(DataLayout->getAllocaAddrSpace() == Private); 248 249 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 250 !isAMDGCN(Triple)); 251 UseAddrSpaceMapMangling = true; 252 253 // Set pointer width and alignment for target address space 0. 254 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 255 if (getMaxPointerWidth() == 64) { 256 LongWidth = LongAlign = 64; 257 SizeType = UnsignedLong; 258 PtrDiffType = SignedLong; 259 IntPtrType = SignedLong; 260 } 261 262 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 263 } 264 265 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 266 TargetInfo::adjust(Opts); 267 // ToDo: There are still a few places using default address space as private 268 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 269 // can be removed from the following line. 270 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 271 !isAMDGCN(getTriple())); 272 } 273 274 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 275 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 276 Builtin::FirstTSBuiltin); 277 } 278 279 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 280 MacroBuilder &Builder) const { 281 Builder.defineMacro("__AMD__"); 282 Builder.defineMacro("__AMDGPU__"); 283 284 if (isAMDGCN(getTriple())) 285 Builder.defineMacro("__AMDGCN__"); 286 else 287 Builder.defineMacro("__R600__"); 288 289 if (GPUKind != llvm::AMDGPU::GK_NONE) { 290 StringRef CanonName = isAMDGCN(getTriple()) ? 291 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 292 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 293 } 294 295 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 296 // removed in the near future. 297 if (hasFMAF()) 298 Builder.defineMacro("__HAS_FMAF__"); 299 if (hasFastFMAF()) 300 Builder.defineMacro("FP_FAST_FMAF"); 301 if (hasLDEXPF()) 302 Builder.defineMacro("__HAS_LDEXPF__"); 303 if (hasFP64()) 304 Builder.defineMacro("__HAS_FP64__"); 305 if (hasFastFMA()) 306 Builder.defineMacro("FP_FAST_FMA"); 307 } 308 309 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 310 assert(HalfFormat == Aux->HalfFormat); 311 assert(FloatFormat == Aux->FloatFormat); 312 assert(DoubleFormat == Aux->DoubleFormat); 313 314 // On x86_64 long double is 80-bit extended precision format, which is 315 // not supported by AMDGPU. 128-bit floating point format is also not 316 // supported by AMDGPU. Therefore keep its own format for these two types. 317 auto SaveLongDoubleFormat = LongDoubleFormat; 318 auto SaveFloat128Format = Float128Format; 319 copyAuxTarget(Aux); 320 LongDoubleFormat = SaveLongDoubleFormat; 321 Float128Format = SaveFloat128Format; 322 } 323