1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 // XXX - What does the member GPU mean if device name string passed here? 131 if (isAMDGCN(getTriple())) { 132 if (CPU.empty()) 133 CPU = "gfx600"; 134 135 switch (parseAMDGCNName(CPU).Kind) { 136 case GK_GFX906: 137 Features["dl-insts"] = true; 138 LLVM_FALLTHROUGH; 139 case GK_GFX904: 140 case GK_GFX902: 141 case GK_GFX900: 142 Features["gfx9-insts"] = true; 143 LLVM_FALLTHROUGH; 144 case GK_GFX810: 145 case GK_GFX803: 146 case GK_GFX802: 147 case GK_GFX801: 148 Features["vi-insts"] = true; 149 Features["16-bit-insts"] = true; 150 Features["dpp"] = true; 151 Features["s-memrealtime"] = true; 152 LLVM_FALLTHROUGH; 153 case GK_GFX704: 154 case GK_GFX703: 155 case GK_GFX702: 156 case GK_GFX701: 157 case GK_GFX700: 158 Features["ci-insts"] = true; 159 LLVM_FALLTHROUGH; 160 case GK_GFX601: 161 case GK_GFX600: 162 break; 163 case GK_NONE: 164 return false; 165 default: 166 llvm_unreachable("Unhandled GPU!"); 167 } 168 } else { 169 if (CPU.empty()) 170 CPU = "r600"; 171 172 switch (parseR600Name(CPU).Kind) { 173 case GK_CAYMAN: 174 case GK_CYPRESS: 175 case GK_RV770: 176 case GK_RV670: 177 // TODO: Add fp64 when implemented. 178 break; 179 case GK_TURKS: 180 case GK_CAICOS: 181 case GK_BARTS: 182 case GK_SUMO: 183 case GK_REDWOOD: 184 case GK_JUNIPER: 185 case GK_CEDAR: 186 case GK_RV730: 187 case GK_RV710: 188 case GK_RS880: 189 case GK_R630: 190 case GK_R600: 191 break; 192 default: 193 llvm_unreachable("Unhandled GPU!"); 194 } 195 } 196 197 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 198 } 199 200 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 201 TargetOptions &TargetOpts) const { 202 bool hasFP32Denormals = false; 203 bool hasFP64Denormals = false; 204 GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU); 205 for (auto &I : TargetOpts.FeaturesAsWritten) { 206 if (I == "+fp32-denormals" || I == "-fp32-denormals") 207 hasFP32Denormals = true; 208 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 209 hasFP64Denormals = true; 210 } 211 if (!hasFP32Denormals) 212 TargetOpts.Features.push_back( 213 (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm 214 ? '+' 215 : '-') + 216 Twine("fp32-denormals")) 217 .str()); 218 // Always do not flush fp64 or fp16 denorms. 219 if (!hasFP64Denormals && CGOptsGPU.HasFP64) 220 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 221 } 222 223 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU; 224 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[]; 225 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[]; 226 227 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) { 228 const auto *Result = llvm::find_if( 229 R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); 230 231 if (Result == std::end(R600GPUs)) 232 return InvalidGPU; 233 return *Result; 234 } 235 236 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { 237 const auto *Result = llvm::find_if( 238 AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); 239 240 if (Result == std::end(AMDGCNGPUs)) 241 return InvalidGPU; 242 return *Result; 243 } 244 245 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const { 246 if (isAMDGCN(getTriple())) 247 return parseAMDGCNName(Name); 248 else 249 return parseR600Name(Name); 250 } 251 252 void AMDGPUTargetInfo::fillValidCPUList( 253 SmallVectorImpl<StringRef> &Values) const { 254 if (isAMDGCN(getTriple())) 255 llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) { 256 Values.emplace_back(GPU.Name);}); 257 else 258 llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) { 259 Values.emplace_back(GPU.Name);}); 260 } 261 262 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 263 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 264 } 265 266 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 267 const TargetOptions &Opts) 268 : TargetInfo(Triple), 269 GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) { 270 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 271 : DataLayoutStringR600); 272 assert(DataLayout->getAllocaAddrSpace() == Private); 273 274 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 275 !isAMDGCN(Triple)); 276 UseAddrSpaceMapMangling = true; 277 278 // Set pointer width and alignment for target address space 0. 279 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 280 if (getMaxPointerWidth() == 64) { 281 LongWidth = LongAlign = 64; 282 SizeType = UnsignedLong; 283 PtrDiffType = SignedLong; 284 IntPtrType = SignedLong; 285 } 286 287 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 288 } 289 290 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 291 TargetInfo::adjust(Opts); 292 // ToDo: There are still a few places using default address space as private 293 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 294 // can be removed from the following line. 295 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 296 !isAMDGCN(getTriple())); 297 } 298 299 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 300 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 301 Builtin::FirstTSBuiltin); 302 } 303 304 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 305 MacroBuilder &Builder) const { 306 Builder.defineMacro("__AMD__"); 307 Builder.defineMacro("__AMDGPU__"); 308 309 if (isAMDGCN(getTriple())) 310 Builder.defineMacro("__AMDGCN__"); 311 else 312 Builder.defineMacro("__R600__"); 313 314 if (GPU.Kind != GK_NONE) 315 Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__")); 316 317 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 318 // removed in the near future. 319 if (GPU.HasFMAF) 320 Builder.defineMacro("__HAS_FMAF__"); 321 if (GPU.HasFastFMAF) 322 Builder.defineMacro("FP_FAST_FMAF"); 323 if (GPU.HasLDEXPF) 324 Builder.defineMacro("__HAS_LDEXPF__"); 325 if (GPU.HasFP64) 326 Builder.defineMacro("__HAS_FP64__"); 327 if (GPU.HasFastFMA) 328 Builder.defineMacro("FP_FAST_FMA"); 329 } 330