1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/IR/DataLayout.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 39 "-ni:7"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 Generic, // Default 43 Global, // opencl_global 44 Local, // opencl_local 45 Constant, // opencl_constant 46 Private, // opencl_private 47 Generic, // opencl_generic 48 Global, // cuda_device 49 Constant, // cuda_constant 50 Local, // cuda_shared 51 Generic, // ptr32_sptr 52 Generic, // ptr32_uptr 53 Generic // ptr64 54 }; 55 56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 57 Private, // Default 58 Global, // opencl_global 59 Local, // opencl_local 60 Constant, // opencl_constant 61 Private, // opencl_private 62 Generic, // opencl_generic 63 Global, // cuda_device 64 Constant, // cuda_constant 65 Local, // cuda_shared 66 Generic, // ptr32_sptr 67 Generic, // ptr32_uptr 68 Generic // ptr64 69 70 }; 71 } // namespace targets 72 } // namespace clang 73 74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 75 #define BUILTIN(ID, TYPE, ATTRS) \ 76 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 78 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 79 #include "clang/Basic/BuiltinsAMDGPU.def" 80 }; 81 82 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 83 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 84 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 85 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 86 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 87 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 88 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 89 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 90 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 91 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 92 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 93 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 94 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 95 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 96 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 97 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 98 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 99 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 100 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 101 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 102 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 103 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 104 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 105 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 106 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 107 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 108 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 109 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 110 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 111 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 112 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 113 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 114 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 115 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 116 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 117 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 118 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 119 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 120 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 121 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 122 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 123 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 124 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 125 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 126 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 127 "flat_scratch_lo", "flat_scratch_hi" 128 }; 129 130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 131 return llvm::makeArrayRef(GCCRegNames); 132 } 133 134 bool AMDGPUTargetInfo::initFeatureMap( 135 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 136 const std::vector<std::string> &FeatureVec) const { 137 138 using namespace llvm::AMDGPU; 139 140 // XXX - What does the member GPU mean if device name string passed here? 141 if (isAMDGCN(getTriple())) { 142 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 143 case GK_GFX1012: 144 case GK_GFX1011: 145 Features["dot1-insts"] = true; 146 Features["dot2-insts"] = true; 147 Features["dot5-insts"] = true; 148 Features["dot6-insts"] = true; 149 LLVM_FALLTHROUGH; 150 case GK_GFX1010: 151 Features["dl-insts"] = true; 152 Features["ci-insts"] = true; 153 Features["flat-address-space"] = true; 154 Features["16-bit-insts"] = true; 155 Features["dpp"] = true; 156 Features["gfx8-insts"] = true; 157 Features["gfx9-insts"] = true; 158 Features["gfx10-insts"] = true; 159 Features["s-memrealtime"] = true; 160 break; 161 case GK_GFX908: 162 Features["dot3-insts"] = true; 163 Features["dot4-insts"] = true; 164 Features["dot5-insts"] = true; 165 Features["dot6-insts"] = true; 166 Features["mai-insts"] = true; 167 LLVM_FALLTHROUGH; 168 case GK_GFX906: 169 Features["dl-insts"] = true; 170 Features["dot1-insts"] = true; 171 Features["dot2-insts"] = true; 172 LLVM_FALLTHROUGH; 173 case GK_GFX909: 174 case GK_GFX904: 175 case GK_GFX902: 176 case GK_GFX900: 177 Features["gfx9-insts"] = true; 178 LLVM_FALLTHROUGH; 179 case GK_GFX810: 180 case GK_GFX803: 181 case GK_GFX802: 182 case GK_GFX801: 183 Features["gfx8-insts"] = true; 184 Features["16-bit-insts"] = true; 185 Features["dpp"] = true; 186 Features["s-memrealtime"] = true; 187 LLVM_FALLTHROUGH; 188 case GK_GFX704: 189 case GK_GFX703: 190 case GK_GFX702: 191 case GK_GFX701: 192 case GK_GFX700: 193 Features["ci-insts"] = true; 194 Features["flat-address-space"] = true; 195 LLVM_FALLTHROUGH; 196 case GK_GFX601: 197 case GK_GFX600: 198 break; 199 case GK_NONE: 200 break; 201 default: 202 llvm_unreachable("Unhandled GPU!"); 203 } 204 } else { 205 if (CPU.empty()) 206 CPU = "r600"; 207 208 switch (llvm::AMDGPU::parseArchR600(CPU)) { 209 case GK_CAYMAN: 210 case GK_CYPRESS: 211 case GK_RV770: 212 case GK_RV670: 213 // TODO: Add fp64 when implemented. 214 break; 215 case GK_TURKS: 216 case GK_CAICOS: 217 case GK_BARTS: 218 case GK_SUMO: 219 case GK_REDWOOD: 220 case GK_JUNIPER: 221 case GK_CEDAR: 222 case GK_RV730: 223 case GK_RV710: 224 case GK_RS880: 225 case GK_R630: 226 case GK_R600: 227 break; 228 default: 229 llvm_unreachable("Unhandled GPU!"); 230 } 231 } 232 233 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 234 } 235 236 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 237 TargetOptions &TargetOpts) const { 238 bool hasFP32Denormals = false; 239 bool hasFP64Denormals = false; 240 241 for (auto &I : TargetOpts.FeaturesAsWritten) { 242 if (I == "+fp32-denormals" || I == "-fp32-denormals") 243 hasFP32Denormals = true; 244 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 245 hasFP64Denormals = true; 246 } 247 if (!hasFP32Denormals) 248 TargetOpts.Features.push_back( 249 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && 250 CGOpts.FP32DenormalMode.Output == llvm::DenormalMode::IEEE 251 ? '+' : '-') + Twine("fp32-denormals")) 252 .str()); 253 // Always do not flush fp64 or fp16 denorms. 254 if (!hasFP64Denormals && hasFP64()) 255 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 256 } 257 258 void AMDGPUTargetInfo::fillValidCPUList( 259 SmallVectorImpl<StringRef> &Values) const { 260 if (isAMDGCN(getTriple())) 261 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 262 else 263 llvm::AMDGPU::fillValidArchListR600(Values); 264 } 265 266 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 267 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 268 } 269 270 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 271 const TargetOptions &Opts) 272 : TargetInfo(Triple), 273 GPUKind(isAMDGCN(Triple) ? 274 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 275 llvm::AMDGPU::parseArchR600(Opts.CPU)), 276 GPUFeatures(isAMDGCN(Triple) ? 277 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 278 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 279 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 280 : DataLayoutStringR600); 281 assert(DataLayout->getAllocaAddrSpace() == Private); 282 283 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 284 !isAMDGCN(Triple)); 285 UseAddrSpaceMapMangling = true; 286 287 HasLegalHalfType = true; 288 HasFloat16 = true; 289 290 // Set pointer width and alignment for target address space 0. 291 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 292 if (getMaxPointerWidth() == 64) { 293 LongWidth = LongAlign = 64; 294 SizeType = UnsignedLong; 295 PtrDiffType = SignedLong; 296 IntPtrType = SignedLong; 297 } 298 299 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 300 } 301 302 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 303 TargetInfo::adjust(Opts); 304 // ToDo: There are still a few places using default address space as private 305 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 306 // can be removed from the following line. 307 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 308 !isAMDGCN(getTriple())); 309 } 310 311 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 312 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 313 Builtin::FirstTSBuiltin); 314 } 315 316 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 317 MacroBuilder &Builder) const { 318 Builder.defineMacro("__AMD__"); 319 Builder.defineMacro("__AMDGPU__"); 320 321 if (isAMDGCN(getTriple())) 322 Builder.defineMacro("__AMDGCN__"); 323 else 324 Builder.defineMacro("__R600__"); 325 326 if (GPUKind != llvm::AMDGPU::GK_NONE) { 327 StringRef CanonName = isAMDGCN(getTriple()) ? 328 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 329 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 330 } 331 332 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 333 // removed in the near future. 334 if (hasFMAF()) 335 Builder.defineMacro("__HAS_FMAF__"); 336 if (hasFastFMAF()) 337 Builder.defineMacro("FP_FAST_FMAF"); 338 if (hasLDEXPF()) 339 Builder.defineMacro("__HAS_LDEXPF__"); 340 if (hasFP64()) 341 Builder.defineMacro("__HAS_FP64__"); 342 if (hasFastFMA()) 343 Builder.defineMacro("FP_FAST_FMA"); 344 } 345 346 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 347 assert(HalfFormat == Aux->HalfFormat); 348 assert(FloatFormat == Aux->FloatFormat); 349 assert(DoubleFormat == Aux->DoubleFormat); 350 351 // On x86_64 long double is 80-bit extended precision format, which is 352 // not supported by AMDGPU. 128-bit floating point format is also not 353 // supported by AMDGPU. Therefore keep its own format for these two types. 354 auto SaveLongDoubleFormat = LongDoubleFormat; 355 auto SaveFloat128Format = Float128Format; 356 copyAuxTarget(Aux); 357 LongDoubleFormat = SaveLongDoubleFormat; 358 Float128Format = SaveFloat128Format; 359 } 360