1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 using namespace llvm::AMDGPU; 131 132 // XXX - What does the member GPU mean if device name string passed here? 133 if (isAMDGCN(getTriple())) { 134 if (CPU.empty()) 135 CPU = "gfx600"; 136 137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 138 case GK_GFX906: 139 Features["dl-insts"] = true; 140 LLVM_FALLTHROUGH; 141 case GK_GFX904: 142 case GK_GFX902: 143 case GK_GFX900: 144 Features["gfx9-insts"] = true; 145 LLVM_FALLTHROUGH; 146 case GK_GFX810: 147 case GK_GFX803: 148 case GK_GFX802: 149 case GK_GFX801: 150 Features["vi-insts"] = true; 151 Features["16-bit-insts"] = true; 152 Features["dpp"] = true; 153 Features["s-memrealtime"] = true; 154 LLVM_FALLTHROUGH; 155 case GK_GFX704: 156 case GK_GFX703: 157 case GK_GFX702: 158 case GK_GFX701: 159 case GK_GFX700: 160 Features["ci-insts"] = true; 161 LLVM_FALLTHROUGH; 162 case GK_GFX601: 163 case GK_GFX600: 164 break; 165 case GK_NONE: 166 return false; 167 default: 168 llvm_unreachable("Unhandled GPU!"); 169 } 170 } else { 171 if (CPU.empty()) 172 CPU = "r600"; 173 174 switch (llvm::AMDGPU::parseArchR600(CPU)) { 175 case GK_CAYMAN: 176 case GK_CYPRESS: 177 case GK_RV770: 178 case GK_RV670: 179 // TODO: Add fp64 when implemented. 180 break; 181 case GK_TURKS: 182 case GK_CAICOS: 183 case GK_BARTS: 184 case GK_SUMO: 185 case GK_REDWOOD: 186 case GK_JUNIPER: 187 case GK_CEDAR: 188 case GK_RV730: 189 case GK_RV710: 190 case GK_RS880: 191 case GK_R630: 192 case GK_R600: 193 break; 194 default: 195 llvm_unreachable("Unhandled GPU!"); 196 } 197 } 198 199 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 200 } 201 202 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 203 TargetOptions &TargetOpts) const { 204 bool hasFP32Denormals = false; 205 bool hasFP64Denormals = false; 206 207 for (auto &I : TargetOpts.FeaturesAsWritten) { 208 if (I == "+fp32-denormals" || I == "-fp32-denormals") 209 hasFP32Denormals = true; 210 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 211 hasFP64Denormals = true; 212 } 213 if (!hasFP32Denormals) 214 TargetOpts.Features.push_back( 215 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 216 ? '+' : '-') + Twine("fp32-denormals")) 217 .str()); 218 // Always do not flush fp64 or fp16 denorms. 219 if (!hasFP64Denormals && hasFP64()) 220 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 221 } 222 223 void AMDGPUTargetInfo::fillValidCPUList( 224 SmallVectorImpl<StringRef> &Values) const { 225 if (isAMDGCN(getTriple())) 226 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 227 else 228 llvm::AMDGPU::fillValidArchListR600(Values); 229 } 230 231 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 232 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 233 } 234 235 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 236 const TargetOptions &Opts) 237 : TargetInfo(Triple), 238 GPUKind(isAMDGCN(Triple) ? 239 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 240 llvm::AMDGPU::parseArchR600(Opts.CPU)), 241 GPUFeatures(isAMDGCN(Triple) ? 242 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 243 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 244 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 245 : DataLayoutStringR600); 246 assert(DataLayout->getAllocaAddrSpace() == Private); 247 248 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 249 !isAMDGCN(Triple)); 250 UseAddrSpaceMapMangling = true; 251 252 // Set pointer width and alignment for target address space 0. 253 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 254 if (getMaxPointerWidth() == 64) { 255 LongWidth = LongAlign = 64; 256 SizeType = UnsignedLong; 257 PtrDiffType = SignedLong; 258 IntPtrType = SignedLong; 259 } 260 261 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 262 } 263 264 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 265 TargetInfo::adjust(Opts); 266 // ToDo: There are still a few places using default address space as private 267 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 268 // can be removed from the following line. 269 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 270 !isAMDGCN(getTriple())); 271 } 272 273 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 274 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 275 Builtin::FirstTSBuiltin); 276 } 277 278 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 279 MacroBuilder &Builder) const { 280 Builder.defineMacro("__AMD__"); 281 Builder.defineMacro("__AMDGPU__"); 282 283 if (isAMDGCN(getTriple())) 284 Builder.defineMacro("__AMDGCN__"); 285 else 286 Builder.defineMacro("__R600__"); 287 288 if (GPUKind != llvm::AMDGPU::GK_NONE) { 289 StringRef CanonName = isAMDGCN(getTriple()) ? 290 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 291 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 292 } 293 294 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 295 // removed in the near future. 296 if (hasFMAF()) 297 Builder.defineMacro("__HAS_FMAF__"); 298 if (hasFastFMAF()) 299 Builder.defineMacro("FP_FAST_FMAF"); 300 if (hasLDEXPF()) 301 Builder.defineMacro("__HAS_LDEXPF__"); 302 if (hasFP64()) 303 Builder.defineMacro("__HAS_FP64__"); 304 if (hasFastFMA()) 305 Builder.defineMacro("FP_FAST_FMA"); 306 } 307