1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 if (CPU.empty()) 135 CPU = "gfx600"; 136 137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 138 case GK_GFX906: 139 Features["dl-insts"] = true; 140 LLVM_FALLTHROUGH; 141 case GK_GFX909: 142 case GK_GFX904: 143 case GK_GFX902: 144 case GK_GFX900: 145 Features["gfx9-insts"] = true; 146 LLVM_FALLTHROUGH; 147 case GK_GFX810: 148 case GK_GFX803: 149 case GK_GFX802: 150 case GK_GFX801: 151 Features["vi-insts"] = true; 152 Features["16-bit-insts"] = true; 153 Features["dpp"] = true; 154 Features["s-memrealtime"] = true; 155 LLVM_FALLTHROUGH; 156 case GK_GFX704: 157 case GK_GFX703: 158 case GK_GFX702: 159 case GK_GFX701: 160 case GK_GFX700: 161 Features["ci-insts"] = true; 162 LLVM_FALLTHROUGH; 163 case GK_GFX601: 164 case GK_GFX600: 165 break; 166 case GK_NONE: 167 return false; 168 default: 169 llvm_unreachable("Unhandled GPU!"); 170 } 171 } else { 172 if (CPU.empty()) 173 CPU = "r600"; 174 175 switch (llvm::AMDGPU::parseArchR600(CPU)) { 176 case GK_CAYMAN: 177 case GK_CYPRESS: 178 case GK_RV770: 179 case GK_RV670: 180 // TODO: Add fp64 when implemented. 181 break; 182 case GK_TURKS: 183 case GK_CAICOS: 184 case GK_BARTS: 185 case GK_SUMO: 186 case GK_REDWOOD: 187 case GK_JUNIPER: 188 case GK_CEDAR: 189 case GK_RV730: 190 case GK_RV710: 191 case GK_RS880: 192 case GK_R630: 193 case GK_R600: 194 break; 195 default: 196 llvm_unreachable("Unhandled GPU!"); 197 } 198 } 199 200 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 201 } 202 203 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 204 TargetOptions &TargetOpts) const { 205 bool hasFP32Denormals = false; 206 bool hasFP64Denormals = false; 207 208 for (auto &I : TargetOpts.FeaturesAsWritten) { 209 if (I == "+fp32-denormals" || I == "-fp32-denormals") 210 hasFP32Denormals = true; 211 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 212 hasFP64Denormals = true; 213 } 214 if (!hasFP32Denormals) 215 TargetOpts.Features.push_back( 216 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 217 ? '+' : '-') + Twine("fp32-denormals")) 218 .str()); 219 // Always do not flush fp64 or fp16 denorms. 220 if (!hasFP64Denormals && hasFP64()) 221 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 222 } 223 224 void AMDGPUTargetInfo::fillValidCPUList( 225 SmallVectorImpl<StringRef> &Values) const { 226 if (isAMDGCN(getTriple())) 227 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 228 else 229 llvm::AMDGPU::fillValidArchListR600(Values); 230 } 231 232 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 233 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 234 } 235 236 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 237 const TargetOptions &Opts) 238 : TargetInfo(Triple), 239 GPUKind(isAMDGCN(Triple) ? 240 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 241 llvm::AMDGPU::parseArchR600(Opts.CPU)), 242 GPUFeatures(isAMDGCN(Triple) ? 243 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 244 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 245 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 246 : DataLayoutStringR600); 247 assert(DataLayout->getAllocaAddrSpace() == Private); 248 249 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 250 !isAMDGCN(Triple)); 251 UseAddrSpaceMapMangling = true; 252 253 // Set pointer width and alignment for target address space 0. 254 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 255 if (getMaxPointerWidth() == 64) { 256 LongWidth = LongAlign = 64; 257 SizeType = UnsignedLong; 258 PtrDiffType = SignedLong; 259 IntPtrType = SignedLong; 260 } 261 262 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 263 } 264 265 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 266 TargetInfo::adjust(Opts); 267 // ToDo: There are still a few places using default address space as private 268 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 269 // can be removed from the following line. 270 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 271 !isAMDGCN(getTriple())); 272 } 273 274 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 275 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 276 Builtin::FirstTSBuiltin); 277 } 278 279 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 280 MacroBuilder &Builder) const { 281 Builder.defineMacro("__AMD__"); 282 Builder.defineMacro("__AMDGPU__"); 283 284 if (isAMDGCN(getTriple())) 285 Builder.defineMacro("__AMDGCN__"); 286 else 287 Builder.defineMacro("__R600__"); 288 289 if (GPUKind != llvm::AMDGPU::GK_NONE) { 290 StringRef CanonName = isAMDGCN(getTriple()) ? 291 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 292 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 293 } 294 295 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 296 // removed in the near future. 297 if (hasFMAF()) 298 Builder.defineMacro("__HAS_FMAF__"); 299 if (hasFastFMAF()) 300 Builder.defineMacro("FP_FAST_FMAF"); 301 if (hasLDEXPF()) 302 Builder.defineMacro("__HAS_LDEXPF__"); 303 if (hasFP64()) 304 Builder.defineMacro("__HAS_FP64__"); 305 if (hasFastFMA()) 306 Builder.defineMacro("FP_FAST_FMA"); 307 } 308