1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 38 "-ni:7"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 135 case GK_GFX1012: 136 case GK_GFX1011: 137 Features["dot1-insts"] = true; 138 Features["dot2-insts"] = true; 139 Features["dot5-insts"] = true; 140 Features["dot6-insts"] = true; 141 LLVM_FALLTHROUGH; 142 case GK_GFX1010: 143 Features["dl-insts"] = true; 144 Features["ci-insts"] = true; 145 Features["flat-address-space"] = true; 146 Features["16-bit-insts"] = true; 147 Features["dpp"] = true; 148 Features["gfx8-insts"] = true; 149 Features["gfx9-insts"] = true; 150 Features["gfx10-insts"] = true; 151 Features["s-memrealtime"] = true; 152 break; 153 case GK_GFX908: 154 Features["dot3-insts"] = true; 155 Features["dot4-insts"] = true; 156 Features["dot5-insts"] = true; 157 Features["dot6-insts"] = true; 158 LLVM_FALLTHROUGH; 159 case GK_GFX906: 160 Features["dl-insts"] = true; 161 Features["dot1-insts"] = true; 162 Features["dot2-insts"] = true; 163 LLVM_FALLTHROUGH; 164 case GK_GFX909: 165 case GK_GFX904: 166 case GK_GFX902: 167 case GK_GFX900: 168 Features["gfx9-insts"] = true; 169 LLVM_FALLTHROUGH; 170 case GK_GFX810: 171 case GK_GFX803: 172 case GK_GFX802: 173 case GK_GFX801: 174 Features["gfx8-insts"] = true; 175 Features["16-bit-insts"] = true; 176 Features["dpp"] = true; 177 Features["s-memrealtime"] = true; 178 LLVM_FALLTHROUGH; 179 case GK_GFX704: 180 case GK_GFX703: 181 case GK_GFX702: 182 case GK_GFX701: 183 case GK_GFX700: 184 Features["ci-insts"] = true; 185 Features["flat-address-space"] = true; 186 LLVM_FALLTHROUGH; 187 case GK_GFX601: 188 case GK_GFX600: 189 break; 190 case GK_NONE: 191 break; 192 default: 193 llvm_unreachable("Unhandled GPU!"); 194 } 195 } else { 196 if (CPU.empty()) 197 CPU = "r600"; 198 199 switch (llvm::AMDGPU::parseArchR600(CPU)) { 200 case GK_CAYMAN: 201 case GK_CYPRESS: 202 case GK_RV770: 203 case GK_RV670: 204 // TODO: Add fp64 when implemented. 205 break; 206 case GK_TURKS: 207 case GK_CAICOS: 208 case GK_BARTS: 209 case GK_SUMO: 210 case GK_REDWOOD: 211 case GK_JUNIPER: 212 case GK_CEDAR: 213 case GK_RV730: 214 case GK_RV710: 215 case GK_RS880: 216 case GK_R630: 217 case GK_R600: 218 break; 219 default: 220 llvm_unreachable("Unhandled GPU!"); 221 } 222 } 223 224 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 225 } 226 227 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 228 TargetOptions &TargetOpts) const { 229 bool hasFP32Denormals = false; 230 bool hasFP64Denormals = false; 231 232 for (auto &I : TargetOpts.FeaturesAsWritten) { 233 if (I == "+fp32-denormals" || I == "-fp32-denormals") 234 hasFP32Denormals = true; 235 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 236 hasFP64Denormals = true; 237 } 238 if (!hasFP32Denormals) 239 TargetOpts.Features.push_back( 240 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 241 ? '+' : '-') + Twine("fp32-denormals")) 242 .str()); 243 // Always do not flush fp64 or fp16 denorms. 244 if (!hasFP64Denormals && hasFP64()) 245 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 246 } 247 248 void AMDGPUTargetInfo::fillValidCPUList( 249 SmallVectorImpl<StringRef> &Values) const { 250 if (isAMDGCN(getTriple())) 251 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 252 else 253 llvm::AMDGPU::fillValidArchListR600(Values); 254 } 255 256 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 257 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 258 } 259 260 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 261 const TargetOptions &Opts) 262 : TargetInfo(Triple), 263 GPUKind(isAMDGCN(Triple) ? 264 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 265 llvm::AMDGPU::parseArchR600(Opts.CPU)), 266 GPUFeatures(isAMDGCN(Triple) ? 267 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 268 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 269 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 270 : DataLayoutStringR600); 271 assert(DataLayout->getAllocaAddrSpace() == Private); 272 273 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 274 !isAMDGCN(Triple)); 275 UseAddrSpaceMapMangling = true; 276 277 HasLegalHalfType = true; 278 HasFloat16 = true; 279 280 // Set pointer width and alignment for target address space 0. 281 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 282 if (getMaxPointerWidth() == 64) { 283 LongWidth = LongAlign = 64; 284 SizeType = UnsignedLong; 285 PtrDiffType = SignedLong; 286 IntPtrType = SignedLong; 287 } 288 289 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 290 } 291 292 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 293 TargetInfo::adjust(Opts); 294 // ToDo: There are still a few places using default address space as private 295 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 296 // can be removed from the following line. 297 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 298 !isAMDGCN(getTriple())); 299 } 300 301 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 302 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 303 Builtin::FirstTSBuiltin); 304 } 305 306 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 307 MacroBuilder &Builder) const { 308 Builder.defineMacro("__AMD__"); 309 Builder.defineMacro("__AMDGPU__"); 310 311 if (isAMDGCN(getTriple())) 312 Builder.defineMacro("__AMDGCN__"); 313 else 314 Builder.defineMacro("__R600__"); 315 316 if (GPUKind != llvm::AMDGPU::GK_NONE) { 317 StringRef CanonName = isAMDGCN(getTriple()) ? 318 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 319 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 320 } 321 322 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 323 // removed in the near future. 324 if (hasFMAF()) 325 Builder.defineMacro("__HAS_FMAF__"); 326 if (hasFastFMAF()) 327 Builder.defineMacro("FP_FAST_FMAF"); 328 if (hasLDEXPF()) 329 Builder.defineMacro("__HAS_LDEXPF__"); 330 if (hasFP64()) 331 Builder.defineMacro("__HAS_FP64__"); 332 if (hasFastFMA()) 333 Builder.defineMacro("FP_FAST_FMA"); 334 } 335 336 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 337 assert(HalfFormat == Aux->HalfFormat); 338 assert(FloatFormat == Aux->FloatFormat); 339 assert(DoubleFormat == Aux->DoubleFormat); 340 341 // On x86_64 long double is 80-bit extended precision format, which is 342 // not supported by AMDGPU. 128-bit floating point format is also not 343 // supported by AMDGPU. Therefore keep its own format for these two types. 344 auto SaveLongDoubleFormat = LongDoubleFormat; 345 auto SaveFloat128Format = Float128Format; 346 copyAuxTarget(Aux); 347 LongDoubleFormat = SaveLongDoubleFormat; 348 Float128Format = SaveFloat128Format; 349 } 350