1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/IR/DataLayout.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 39 "-ni:7"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 Generic, // Default 43 Global, // opencl_global 44 Local, // opencl_local 45 Constant, // opencl_constant 46 Private, // opencl_private 47 Generic, // opencl_generic 48 Global, // cuda_device 49 Constant, // cuda_constant 50 Local, // cuda_shared 51 Generic, // ptr32_sptr 52 Generic, // ptr32_uptr 53 Generic // ptr64 54 }; 55 56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 57 Private, // Default 58 Global, // opencl_global 59 Local, // opencl_local 60 Constant, // opencl_constant 61 Private, // opencl_private 62 Generic, // opencl_generic 63 Global, // cuda_device 64 Constant, // cuda_constant 65 Local, // cuda_shared 66 Generic, // ptr32_sptr 67 Generic, // ptr32_uptr 68 Generic // ptr64 69 70 }; 71 } // namespace targets 72 } // namespace clang 73 74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 75 #define BUILTIN(ID, TYPE, ATTRS) \ 76 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 78 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 79 #include "clang/Basic/BuiltinsAMDGPU.def" 80 }; 81 82 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 83 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 84 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 85 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 86 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 87 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 88 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 89 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 90 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 91 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 92 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 93 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 94 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 95 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 96 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 97 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 98 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 99 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 100 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 101 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 102 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 103 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 104 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 105 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 106 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 107 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 108 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 109 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 110 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 111 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 112 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 113 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 114 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 115 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 116 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 117 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 118 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 119 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 120 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 121 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 122 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 123 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 124 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 125 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 126 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 127 "flat_scratch_lo", "flat_scratch_hi" 128 }; 129 130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 131 return llvm::makeArrayRef(GCCRegNames); 132 } 133 134 bool AMDGPUTargetInfo::initFeatureMap( 135 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 136 const std::vector<std::string> &FeatureVec) const { 137 138 using namespace llvm::AMDGPU; 139 140 // XXX - What does the member GPU mean if device name string passed here? 141 if (isAMDGCN(getTriple())) { 142 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 143 case GK_GFX1012: 144 case GK_GFX1011: 145 Features["dot1-insts"] = true; 146 Features["dot2-insts"] = true; 147 Features["dot5-insts"] = true; 148 Features["dot6-insts"] = true; 149 LLVM_FALLTHROUGH; 150 case GK_GFX1010: 151 Features["dl-insts"] = true; 152 Features["ci-insts"] = true; 153 Features["flat-address-space"] = true; 154 Features["16-bit-insts"] = true; 155 Features["dpp"] = true; 156 Features["gfx8-insts"] = true; 157 Features["gfx9-insts"] = true; 158 Features["gfx10-insts"] = true; 159 Features["s-memrealtime"] = true; 160 break; 161 case GK_GFX908: 162 Features["dot3-insts"] = true; 163 Features["dot4-insts"] = true; 164 Features["dot5-insts"] = true; 165 Features["dot6-insts"] = true; 166 LLVM_FALLTHROUGH; 167 case GK_GFX906: 168 Features["dl-insts"] = true; 169 Features["dot1-insts"] = true; 170 Features["dot2-insts"] = true; 171 LLVM_FALLTHROUGH; 172 case GK_GFX909: 173 case GK_GFX904: 174 case GK_GFX902: 175 case GK_GFX900: 176 Features["gfx9-insts"] = true; 177 LLVM_FALLTHROUGH; 178 case GK_GFX810: 179 case GK_GFX803: 180 case GK_GFX802: 181 case GK_GFX801: 182 Features["gfx8-insts"] = true; 183 Features["16-bit-insts"] = true; 184 Features["dpp"] = true; 185 Features["s-memrealtime"] = true; 186 LLVM_FALLTHROUGH; 187 case GK_GFX704: 188 case GK_GFX703: 189 case GK_GFX702: 190 case GK_GFX701: 191 case GK_GFX700: 192 Features["ci-insts"] = true; 193 Features["flat-address-space"] = true; 194 LLVM_FALLTHROUGH; 195 case GK_GFX601: 196 case GK_GFX600: 197 break; 198 case GK_NONE: 199 break; 200 default: 201 llvm_unreachable("Unhandled GPU!"); 202 } 203 } else { 204 if (CPU.empty()) 205 CPU = "r600"; 206 207 switch (llvm::AMDGPU::parseArchR600(CPU)) { 208 case GK_CAYMAN: 209 case GK_CYPRESS: 210 case GK_RV770: 211 case GK_RV670: 212 // TODO: Add fp64 when implemented. 213 break; 214 case GK_TURKS: 215 case GK_CAICOS: 216 case GK_BARTS: 217 case GK_SUMO: 218 case GK_REDWOOD: 219 case GK_JUNIPER: 220 case GK_CEDAR: 221 case GK_RV730: 222 case GK_RV710: 223 case GK_RS880: 224 case GK_R630: 225 case GK_R600: 226 break; 227 default: 228 llvm_unreachable("Unhandled GPU!"); 229 } 230 } 231 232 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 233 } 234 235 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 236 TargetOptions &TargetOpts) const { 237 bool hasFP32Denormals = false; 238 bool hasFP64Denormals = false; 239 240 for (auto &I : TargetOpts.FeaturesAsWritten) { 241 if (I == "+fp32-denormals" || I == "-fp32-denormals") 242 hasFP32Denormals = true; 243 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 244 hasFP64Denormals = true; 245 } 246 if (!hasFP32Denormals) 247 TargetOpts.Features.push_back( 248 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && 249 CGOpts.FP32DenormalMode == llvm::DenormalMode::IEEE 250 ? '+' : '-') + Twine("fp32-denormals")) 251 .str()); 252 // Always do not flush fp64 or fp16 denorms. 253 if (!hasFP64Denormals && hasFP64()) 254 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 255 } 256 257 void AMDGPUTargetInfo::fillValidCPUList( 258 SmallVectorImpl<StringRef> &Values) const { 259 if (isAMDGCN(getTriple())) 260 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 261 else 262 llvm::AMDGPU::fillValidArchListR600(Values); 263 } 264 265 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 266 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 267 } 268 269 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 270 const TargetOptions &Opts) 271 : TargetInfo(Triple), 272 GPUKind(isAMDGCN(Triple) ? 273 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 274 llvm::AMDGPU::parseArchR600(Opts.CPU)), 275 GPUFeatures(isAMDGCN(Triple) ? 276 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 277 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 278 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 279 : DataLayoutStringR600); 280 assert(DataLayout->getAllocaAddrSpace() == Private); 281 282 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 283 !isAMDGCN(Triple)); 284 UseAddrSpaceMapMangling = true; 285 286 HasLegalHalfType = true; 287 HasFloat16 = true; 288 289 // Set pointer width and alignment for target address space 0. 290 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 291 if (getMaxPointerWidth() == 64) { 292 LongWidth = LongAlign = 64; 293 SizeType = UnsignedLong; 294 PtrDiffType = SignedLong; 295 IntPtrType = SignedLong; 296 } 297 298 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 299 } 300 301 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 302 TargetInfo::adjust(Opts); 303 // ToDo: There are still a few places using default address space as private 304 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 305 // can be removed from the following line. 306 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 307 !isAMDGCN(getTriple())); 308 } 309 310 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 311 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 312 Builtin::FirstTSBuiltin); 313 } 314 315 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 316 MacroBuilder &Builder) const { 317 Builder.defineMacro("__AMD__"); 318 Builder.defineMacro("__AMDGPU__"); 319 320 if (isAMDGCN(getTriple())) 321 Builder.defineMacro("__AMDGCN__"); 322 else 323 Builder.defineMacro("__R600__"); 324 325 if (GPUKind != llvm::AMDGPU::GK_NONE) { 326 StringRef CanonName = isAMDGCN(getTriple()) ? 327 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 328 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 329 } 330 331 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 332 // removed in the near future. 333 if (hasFMAF()) 334 Builder.defineMacro("__HAS_FMAF__"); 335 if (hasFastFMAF()) 336 Builder.defineMacro("FP_FAST_FMAF"); 337 if (hasLDEXPF()) 338 Builder.defineMacro("__HAS_LDEXPF__"); 339 if (hasFP64()) 340 Builder.defineMacro("__HAS_FP64__"); 341 if (hasFastFMA()) 342 Builder.defineMacro("FP_FAST_FMA"); 343 } 344 345 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 346 assert(HalfFormat == Aux->HalfFormat); 347 assert(FloatFormat == Aux->FloatFormat); 348 assert(DoubleFormat == Aux->DoubleFormat); 349 350 // On x86_64 long double is 80-bit extended precision format, which is 351 // not supported by AMDGPU. 128-bit floating point format is also not 352 // supported by AMDGPU. Therefore keep its own format for these two types. 353 auto SaveLongDoubleFormat = LongDoubleFormat; 354 auto SaveFloat128Format = Float128Format; 355 copyAuxTarget(Aux); 356 LongDoubleFormat = SaveLongDoubleFormat; 357 Float128Format = SaveFloat128Format; 358 } 359