1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 if (CPU.empty()) 135 CPU = "gfx600"; 136 137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 138 case GK_GFX1010: 139 Features["dl-insts"] = true; 140 Features["16-bit-insts"] = true; 141 Features["dpp"] = true; 142 Features["gfx9-insts"] = true; 143 Features["gfx10-insts"] = true; 144 Features["s-memrealtime"] = true; 145 break; 146 case GK_GFX906: 147 Features["dl-insts"] = true; 148 Features["dot1-insts"] = true; 149 Features["dot2-insts"] = true; 150 LLVM_FALLTHROUGH; 151 case GK_GFX909: 152 case GK_GFX904: 153 case GK_GFX902: 154 case GK_GFX900: 155 Features["gfx9-insts"] = true; 156 LLVM_FALLTHROUGH; 157 case GK_GFX810: 158 case GK_GFX803: 159 case GK_GFX802: 160 case GK_GFX801: 161 Features["gfx8-insts"] = true; 162 Features["16-bit-insts"] = true; 163 Features["dpp"] = true; 164 Features["s-memrealtime"] = true; 165 LLVM_FALLTHROUGH; 166 case GK_GFX704: 167 case GK_GFX703: 168 case GK_GFX702: 169 case GK_GFX701: 170 case GK_GFX700: 171 Features["ci-insts"] = true; 172 LLVM_FALLTHROUGH; 173 case GK_GFX601: 174 case GK_GFX600: 175 break; 176 case GK_NONE: 177 return false; 178 default: 179 llvm_unreachable("Unhandled GPU!"); 180 } 181 } else { 182 if (CPU.empty()) 183 CPU = "r600"; 184 185 switch (llvm::AMDGPU::parseArchR600(CPU)) { 186 case GK_CAYMAN: 187 case GK_CYPRESS: 188 case GK_RV770: 189 case GK_RV670: 190 // TODO: Add fp64 when implemented. 191 break; 192 case GK_TURKS: 193 case GK_CAICOS: 194 case GK_BARTS: 195 case GK_SUMO: 196 case GK_REDWOOD: 197 case GK_JUNIPER: 198 case GK_CEDAR: 199 case GK_RV730: 200 case GK_RV710: 201 case GK_RS880: 202 case GK_R630: 203 case GK_R600: 204 break; 205 default: 206 llvm_unreachable("Unhandled GPU!"); 207 } 208 } 209 210 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 211 } 212 213 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 214 TargetOptions &TargetOpts) const { 215 bool hasFP32Denormals = false; 216 bool hasFP64Denormals = false; 217 218 for (auto &I : TargetOpts.FeaturesAsWritten) { 219 if (I == "+fp32-denormals" || I == "-fp32-denormals") 220 hasFP32Denormals = true; 221 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 222 hasFP64Denormals = true; 223 } 224 if (!hasFP32Denormals) 225 TargetOpts.Features.push_back( 226 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 227 ? '+' : '-') + Twine("fp32-denormals")) 228 .str()); 229 // Always do not flush fp64 or fp16 denorms. 230 if (!hasFP64Denormals && hasFP64()) 231 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 232 } 233 234 void AMDGPUTargetInfo::fillValidCPUList( 235 SmallVectorImpl<StringRef> &Values) const { 236 if (isAMDGCN(getTriple())) 237 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 238 else 239 llvm::AMDGPU::fillValidArchListR600(Values); 240 } 241 242 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 243 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 244 } 245 246 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 247 const TargetOptions &Opts) 248 : TargetInfo(Triple), 249 GPUKind(isAMDGCN(Triple) ? 250 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 251 llvm::AMDGPU::parseArchR600(Opts.CPU)), 252 GPUFeatures(isAMDGCN(Triple) ? 253 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 254 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 255 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 256 : DataLayoutStringR600); 257 assert(DataLayout->getAllocaAddrSpace() == Private); 258 259 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 260 !isAMDGCN(Triple)); 261 UseAddrSpaceMapMangling = true; 262 263 HasLegalHalfType = true; 264 HasFloat16 = true; 265 266 // Set pointer width and alignment for target address space 0. 267 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 268 if (getMaxPointerWidth() == 64) { 269 LongWidth = LongAlign = 64; 270 SizeType = UnsignedLong; 271 PtrDiffType = SignedLong; 272 IntPtrType = SignedLong; 273 } 274 275 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 276 } 277 278 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 279 TargetInfo::adjust(Opts); 280 // ToDo: There are still a few places using default address space as private 281 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 282 // can be removed from the following line. 283 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 284 !isAMDGCN(getTriple())); 285 } 286 287 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 288 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 289 Builtin::FirstTSBuiltin); 290 } 291 292 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 293 MacroBuilder &Builder) const { 294 Builder.defineMacro("__AMD__"); 295 Builder.defineMacro("__AMDGPU__"); 296 297 if (isAMDGCN(getTriple())) 298 Builder.defineMacro("__AMDGCN__"); 299 else 300 Builder.defineMacro("__R600__"); 301 302 if (GPUKind != llvm::AMDGPU::GK_NONE) { 303 StringRef CanonName = isAMDGCN(getTriple()) ? 304 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 305 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 306 } 307 308 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 309 // removed in the near future. 310 if (hasFMAF()) 311 Builder.defineMacro("__HAS_FMAF__"); 312 if (hasFastFMAF()) 313 Builder.defineMacro("FP_FAST_FMAF"); 314 if (hasLDEXPF()) 315 Builder.defineMacro("__HAS_LDEXPF__"); 316 if (hasFP64()) 317 Builder.defineMacro("__HAS_FP64__"); 318 if (hasFastFMA()) 319 Builder.defineMacro("FP_FAST_FMA"); 320 } 321 322 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 323 assert(HalfFormat == Aux->HalfFormat); 324 assert(FloatFormat == Aux->FloatFormat); 325 assert(DoubleFormat == Aux->DoubleFormat); 326 327 // On x86_64 long double is 80-bit extended precision format, which is 328 // not supported by AMDGPU. 128-bit floating point format is also not 329 // supported by AMDGPU. Therefore keep its own format for these two types. 330 auto SaveLongDoubleFormat = LongDoubleFormat; 331 auto SaveFloat128Format = Float128Format; 332 copyAuxTarget(Aux); 333 LongDoubleFormat = SaveLongDoubleFormat; 334 Float128Format = SaveFloat128Format; 335 } 336