1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 // XXX - What does the member GPU mean if device name string passed here? 131 if (isAMDGCN(getTriple())) { 132 if (CPU.empty()) 133 CPU = "gfx600"; 134 135 switch (parseAMDGCNName(CPU).Kind) { 136 case GK_GFX906: 137 Features["dl-insts"] = true; 138 LLVM_FALLTHROUGH; 139 case GK_GFX904: 140 case GK_GFX902: 141 case GK_GFX900: 142 Features["gfx9-insts"] = true; 143 LLVM_FALLTHROUGH; 144 case GK_GFX810: 145 case GK_GFX803: 146 case GK_GFX802: 147 case GK_GFX801: 148 Features["16-bit-insts"] = true; 149 Features["dpp"] = true; 150 Features["s-memrealtime"] = true; 151 break; 152 case GK_GFX704: 153 case GK_GFX703: 154 case GK_GFX702: 155 case GK_GFX701: 156 case GK_GFX700: 157 case GK_GFX601: 158 case GK_GFX600: 159 break; 160 case GK_NONE: 161 return false; 162 default: 163 llvm_unreachable("Unhandled GPU!"); 164 } 165 } else { 166 if (CPU.empty()) 167 CPU = "r600"; 168 169 switch (parseR600Name(CPU).Kind) { 170 case GK_CAYMAN: 171 case GK_CYPRESS: 172 case GK_RV770: 173 case GK_RV670: 174 // TODO: Add fp64 when implemented. 175 break; 176 case GK_TURKS: 177 case GK_CAICOS: 178 case GK_BARTS: 179 case GK_SUMO: 180 case GK_REDWOOD: 181 case GK_JUNIPER: 182 case GK_CEDAR: 183 case GK_RV730: 184 case GK_RV710: 185 case GK_RS880: 186 case GK_R630: 187 case GK_R600: 188 break; 189 default: 190 llvm_unreachable("Unhandled GPU!"); 191 } 192 } 193 194 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 195 } 196 197 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 198 TargetOptions &TargetOpts) const { 199 bool hasFP32Denormals = false; 200 bool hasFP64Denormals = false; 201 GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU); 202 for (auto &I : TargetOpts.FeaturesAsWritten) { 203 if (I == "+fp32-denormals" || I == "-fp32-denormals") 204 hasFP32Denormals = true; 205 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 206 hasFP64Denormals = true; 207 } 208 if (!hasFP32Denormals) 209 TargetOpts.Features.push_back( 210 (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm 211 ? '+' 212 : '-') + 213 Twine("fp32-denormals")) 214 .str()); 215 // Always do not flush fp64 or fp16 denorms. 216 if (!hasFP64Denormals && CGOptsGPU.HasFP64) 217 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 218 } 219 220 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU; 221 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[]; 222 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[]; 223 224 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) { 225 const auto *Result = llvm::find_if( 226 R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); 227 228 if (Result == std::end(R600GPUs)) 229 return InvalidGPU; 230 return *Result; 231 } 232 233 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { 234 const auto *Result = llvm::find_if( 235 AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); 236 237 if (Result == std::end(AMDGCNGPUs)) 238 return InvalidGPU; 239 return *Result; 240 } 241 242 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const { 243 if (isAMDGCN(getTriple())) 244 return parseAMDGCNName(Name); 245 else 246 return parseR600Name(Name); 247 } 248 249 void AMDGPUTargetInfo::fillValidCPUList( 250 SmallVectorImpl<StringRef> &Values) const { 251 if (isAMDGCN(getTriple())) 252 llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) { 253 Values.emplace_back(GPU.Name);}); 254 else 255 llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) { 256 Values.emplace_back(GPU.Name);}); 257 } 258 259 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 260 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 261 } 262 263 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 264 const TargetOptions &Opts) 265 : TargetInfo(Triple), 266 GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) { 267 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 268 : DataLayoutStringR600); 269 assert(DataLayout->getAllocaAddrSpace() == Private); 270 271 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 272 !isAMDGCN(Triple)); 273 UseAddrSpaceMapMangling = true; 274 275 // Set pointer width and alignment for target address space 0. 276 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 277 if (getMaxPointerWidth() == 64) { 278 LongWidth = LongAlign = 64; 279 SizeType = UnsignedLong; 280 PtrDiffType = SignedLong; 281 IntPtrType = SignedLong; 282 } 283 284 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 285 } 286 287 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 288 TargetInfo::adjust(Opts); 289 // ToDo: There are still a few places using default address space as private 290 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 291 // can be removed from the following line. 292 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 293 !isAMDGCN(getTriple())); 294 } 295 296 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 297 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 298 Builtin::FirstTSBuiltin); 299 } 300 301 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 302 MacroBuilder &Builder) const { 303 Builder.defineMacro("__AMD__"); 304 Builder.defineMacro("__AMDGPU__"); 305 306 if (isAMDGCN(getTriple())) 307 Builder.defineMacro("__AMDGCN__"); 308 else 309 Builder.defineMacro("__R600__"); 310 311 if (GPU.Kind != GK_NONE) 312 Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__")); 313 314 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 315 // removed in the near future. 316 if (GPU.HasFMAF) 317 Builder.defineMacro("__HAS_FMAF__"); 318 if (GPU.HasFastFMAF) 319 Builder.defineMacro("FP_FAST_FMAF"); 320 if (GPU.HasLDEXPF) 321 Builder.defineMacro("__HAS_LDEXPF__"); 322 if (GPU.HasFP64) 323 Builder.defineMacro("__HAS_FP64__"); 324 if (GPU.HasFastFMA) 325 Builder.defineMacro("FP_FAST_FMA"); 326 } 327