1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34 35 static const char *const DataLayoutStringAMDGCN = 36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 39 40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 41 Generic, // Default 42 Global, // opencl_global 43 Local, // opencl_local 44 Constant, // opencl_constant 45 Private, // opencl_private 46 Generic, // opencl_generic 47 Global, // cuda_device 48 Constant, // cuda_constant 49 Local // cuda_shared 50 }; 51 52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 53 Private, // Default 54 Global, // opencl_global 55 Local, // opencl_local 56 Constant, // opencl_constant 57 Private, // opencl_private 58 Generic, // opencl_generic 59 Global, // cuda_device 60 Constant, // cuda_constant 61 Local // cuda_shared 62 }; 63 } // namespace targets 64 } // namespace clang 65 66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 67 #define BUILTIN(ID, TYPE, ATTRS) \ 68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 71 #include "clang/Basic/BuiltinsAMDGPU.def" 72 }; 73 74 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 119 "flat_scratch_lo", "flat_scratch_hi" 120 }; 121 122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 123 return llvm::makeArrayRef(GCCRegNames); 124 } 125 126 bool AMDGPUTargetInfo::initFeatureMap( 127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 128 const std::vector<std::string> &FeatureVec) const { 129 130 // XXX - What does the member GPU mean if device name string passed here? 131 if (isAMDGCN(getTriple())) { 132 if (CPU.empty()) 133 CPU = "gfx600"; 134 135 switch (parseAMDGCNName(CPU).Kind) { 136 case GK_GFX902: 137 case GK_GFX900: 138 Features["gfx9-insts"] = true; 139 LLVM_FALLTHROUGH; 140 case GK_GFX810: 141 case GK_GFX803: 142 case GK_GFX802: 143 case GK_GFX801: 144 Features["16-bit-insts"] = true; 145 Features["dpp"] = true; 146 Features["s-memrealtime"] = true; 147 break; 148 case GK_GFX704: 149 case GK_GFX703: 150 case GK_GFX702: 151 case GK_GFX701: 152 case GK_GFX700: 153 case GK_GFX601: 154 case GK_GFX600: 155 break; 156 case GK_NONE: 157 return false; 158 default: 159 llvm_unreachable("Unhandled GPU!"); 160 } 161 } else { 162 if (CPU.empty()) 163 CPU = "r600"; 164 165 switch (parseR600Name(CPU).Kind) { 166 case GK_CAYMAN: 167 case GK_CYPRESS: 168 case GK_RV770: 169 case GK_RV670: 170 // TODO: Add fp64 when implemented. 171 break; 172 case GK_TURKS: 173 case GK_CAICOS: 174 case GK_BARTS: 175 case GK_SUMO: 176 case GK_REDWOOD: 177 case GK_JUNIPER: 178 case GK_CEDAR: 179 case GK_RV730: 180 case GK_RV710: 181 case GK_RS880: 182 case GK_R630: 183 case GK_R600: 184 break; 185 default: 186 llvm_unreachable("Unhandled GPU!"); 187 } 188 } 189 190 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 191 } 192 193 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 194 TargetOptions &TargetOpts) const { 195 bool hasFP32Denormals = false; 196 bool hasFP64Denormals = false; 197 GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU); 198 for (auto &I : TargetOpts.FeaturesAsWritten) { 199 if (I == "+fp32-denormals" || I == "-fp32-denormals") 200 hasFP32Denormals = true; 201 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 202 hasFP64Denormals = true; 203 } 204 if (!hasFP32Denormals) 205 TargetOpts.Features.push_back( 206 (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm 207 ? '+' 208 : '-') + 209 Twine("fp32-denormals")) 210 .str()); 211 // Always do not flush fp64 or fp16 denorms. 212 if (!hasFP64Denormals && CGOptsGPU.HasFP64) 213 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 214 } 215 216 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU; 217 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[]; 218 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[]; 219 220 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) { 221 const auto *Result = llvm::find_if( 222 R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); 223 224 if (Result == std::end(R600GPUs)) 225 return InvalidGPU; 226 return *Result; 227 } 228 229 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { 230 const auto *Result = llvm::find_if( 231 AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); 232 233 if (Result == std::end(AMDGCNGPUs)) 234 return InvalidGPU; 235 return *Result; 236 } 237 238 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const { 239 if (isAMDGCN(getTriple())) 240 return parseAMDGCNName(Name); 241 else 242 return parseR600Name(Name); 243 } 244 245 void AMDGPUTargetInfo::fillValidCPUList( 246 SmallVectorImpl<StringRef> &Values) const { 247 if (isAMDGCN(getTriple())) 248 llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) { 249 Values.emplace_back(GPU.Name);}); 250 else 251 llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) { 252 Values.emplace_back(GPU.Name);}); 253 } 254 255 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 256 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 257 } 258 259 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 260 const TargetOptions &Opts) 261 : TargetInfo(Triple), 262 GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) { 263 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 264 : DataLayoutStringR600); 265 assert(DataLayout->getAllocaAddrSpace() == Private); 266 267 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 268 !isAMDGCN(Triple)); 269 UseAddrSpaceMapMangling = true; 270 271 // Set pointer width and alignment for target address space 0. 272 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 273 if (getMaxPointerWidth() == 64) { 274 LongWidth = LongAlign = 64; 275 SizeType = UnsignedLong; 276 PtrDiffType = SignedLong; 277 IntPtrType = SignedLong; 278 } 279 280 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 281 } 282 283 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 284 TargetInfo::adjust(Opts); 285 // ToDo: There are still a few places using default address space as private 286 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 287 // can be removed from the following line. 288 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 289 !isAMDGCN(getTriple())); 290 } 291 292 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 293 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 294 Builtin::FirstTSBuiltin); 295 } 296 297 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 298 MacroBuilder &Builder) const { 299 Builder.defineMacro("__AMD__"); 300 Builder.defineMacro("__AMDGPU__"); 301 302 if (isAMDGCN(getTriple())) 303 Builder.defineMacro("__AMDGCN__"); 304 else 305 Builder.defineMacro("__R600__"); 306 307 if (GPU.Kind != GK_NONE) 308 Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__")); 309 310 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 311 // removed in the near future. 312 if (GPU.HasFMAF) 313 Builder.defineMacro("__HAS_FMAF__"); 314 if (GPU.HasFastFMAF) 315 Builder.defineMacro("FP_FAST_FMAF"); 316 if (GPU.HasLDEXPF) 317 Builder.defineMacro("__HAS_LDEXPF__"); 318 if (GPU.HasFP64) 319 Builder.defineMacro("__HAS_FP64__"); 320 if (GPU.HasFastFMA) 321 Builder.defineMacro("FP_FAST_FMA"); 322 } 323