1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "Targets.h" 16 #include "clang/Basic/Builtins.h" 17 #include "clang/Basic/LangOptions.h" 18 #include "clang/Basic/MacroBuilder.h" 19 #include "clang/Basic/TargetBuiltins.h" 20 #include "clang/Frontend/CodeGenOptions.h" 21 #include "llvm/ADT/StringSwitch.h" 22 23 using namespace clang; 24 using namespace clang::targets; 25 26 namespace clang { 27 namespace targets { 28 29 // If you edit the description strings, make sure you update 30 // getPointerWidthV(). 31 32 static const char *const DataLayoutStringR600 = 33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 35 36 static const char *const DataLayoutStringAMDGCN = 37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 Generic, // Default 43 Global, // opencl_global 44 Local, // opencl_local 45 Constant, // opencl_constant 46 Private, // opencl_private 47 Generic, // opencl_generic 48 Global, // cuda_device 49 Constant, // cuda_constant 50 Local // cuda_shared 51 }; 52 53 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 54 Private, // Default 55 Global, // opencl_global 56 Local, // opencl_local 57 Constant, // opencl_constant 58 Private, // opencl_private 59 Generic, // opencl_generic 60 Global, // cuda_device 61 Constant, // cuda_constant 62 Local // cuda_shared 63 }; 64 } // namespace targets 65 } // namespace clang 66 67 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 68 #define BUILTIN(ID, TYPE, ATTRS) \ 69 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 70 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 71 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 72 #include "clang/Basic/BuiltinsAMDGPU.def" 73 }; 74 75 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 76 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 77 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 78 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 79 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 80 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 81 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 82 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 83 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 84 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 85 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 86 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 87 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 88 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 89 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 90 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 91 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 92 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 93 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 94 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 95 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 96 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 97 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 98 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 99 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 100 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 101 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 102 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 103 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 104 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 105 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 106 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 107 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 108 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 109 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 110 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 111 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 112 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 113 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 114 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 115 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 116 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 117 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 118 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 119 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 120 "flat_scratch_lo", "flat_scratch_hi" 121 }; 122 123 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 124 return llvm::makeArrayRef(GCCRegNames); 125 } 126 127 bool AMDGPUTargetInfo::initFeatureMap( 128 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 129 const std::vector<std::string> &FeatureVec) const { 130 131 // XXX - What does the member GPU mean if device name string passed here? 132 if (isAMDGCN(getTriple())) { 133 if (CPU.empty()) 134 CPU = "gfx600"; 135 136 switch (parseAMDGCNName(CPU).Kind) { 137 case GK_GFX902: 138 case GK_GFX900: 139 Features["gfx9-insts"] = true; 140 LLVM_FALLTHROUGH; 141 case GK_GFX810: 142 case GK_GFX803: 143 case GK_GFX802: 144 case GK_GFX801: 145 Features["16-bit-insts"] = true; 146 Features["dpp"] = true; 147 Features["s-memrealtime"] = true; 148 break; 149 case GK_GFX704: 150 case GK_GFX703: 151 case GK_GFX702: 152 case GK_GFX701: 153 case GK_GFX700: 154 case GK_GFX601: 155 case GK_GFX600: 156 break; 157 case GK_NONE: 158 return false; 159 default: 160 llvm_unreachable("Unhandled GPU!"); 161 } 162 } else { 163 if (CPU.empty()) 164 CPU = "r600"; 165 166 switch (parseR600Name(CPU).Kind) { 167 case GK_CAYMAN: 168 case GK_CYPRESS: 169 case GK_RV770: 170 case GK_RV670: 171 // TODO: Add fp64 when implemented. 172 break; 173 case GK_TURKS: 174 case GK_CAICOS: 175 case GK_BARTS: 176 case GK_SUMO: 177 case GK_REDWOOD: 178 case GK_JUNIPER: 179 case GK_CEDAR: 180 case GK_RV730: 181 case GK_RV710: 182 case GK_RS880: 183 case GK_R630: 184 case GK_R600: 185 break; 186 default: 187 llvm_unreachable("Unhandled GPU!"); 188 } 189 } 190 191 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 192 } 193 194 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 195 TargetOptions &TargetOpts) const { 196 bool hasFP32Denormals = false; 197 bool hasFP64Denormals = false; 198 GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU); 199 for (auto &I : TargetOpts.FeaturesAsWritten) { 200 if (I == "+fp32-denormals" || I == "-fp32-denormals") 201 hasFP32Denormals = true; 202 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 203 hasFP64Denormals = true; 204 } 205 if (!hasFP32Denormals) 206 TargetOpts.Features.push_back( 207 (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm 208 ? '+' 209 : '-') + 210 Twine("fp32-denormals")) 211 .str()); 212 // Always do not flush fp64 or fp16 denorms. 213 if (!hasFP64Denormals && CGOptsGPU.HasFP64) 214 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 215 } 216 217 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU; 218 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[]; 219 constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[]; 220 221 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) { 222 const auto *Result = llvm::find_if( 223 R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); 224 225 if (Result == std::end(R600GPUs)) 226 return InvalidGPU; 227 return *Result; 228 } 229 230 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { 231 const auto *Result = llvm::find_if( 232 AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); 233 234 if (Result == std::end(AMDGCNGPUs)) 235 return InvalidGPU; 236 return *Result; 237 } 238 239 AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const { 240 if (isAMDGCN(getTriple())) 241 return parseAMDGCNName(Name); 242 else 243 return parseR600Name(Name); 244 } 245 246 void AMDGPUTargetInfo::fillValidCPUList( 247 SmallVectorImpl<StringRef> &Values) const { 248 if (isAMDGCN(getTriple())) 249 llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) { 250 Values.emplace_back(GPU.Name);}); 251 else 252 llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) { 253 Values.emplace_back(GPU.Name);}); 254 } 255 256 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 257 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 258 } 259 260 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 261 const TargetOptions &Opts) 262 : TargetInfo(Triple), 263 GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) { 264 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 265 : DataLayoutStringR600); 266 assert(DataLayout->getAllocaAddrSpace() == Private); 267 GCN_Subarch = CudaArch::GFX803; // Default to fiji 268 269 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 270 !isAMDGCN(Triple)); 271 UseAddrSpaceMapMangling = true; 272 273 // Set pointer width and alignment for target address space 0. 274 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 275 if (getMaxPointerWidth() == 64) { 276 LongWidth = LongAlign = 64; 277 SizeType = UnsignedLong; 278 PtrDiffType = SignedLong; 279 IntPtrType = SignedLong; 280 } 281 282 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 283 } 284 285 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 286 TargetInfo::adjust(Opts); 287 // ToDo: There are still a few places using default address space as private 288 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 289 // can be removed from the following line. 290 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 291 !isAMDGCN(getTriple())); 292 } 293 294 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 295 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 296 Builtin::FirstTSBuiltin); 297 } 298 299 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 300 MacroBuilder &Builder) const { 301 Builder.defineMacro("__AMD__"); 302 Builder.defineMacro("__AMDGPU__"); 303 304 if (isAMDGCN(getTriple())) 305 Builder.defineMacro("__AMDGCN__"); 306 else 307 Builder.defineMacro("__R600__"); 308 309 if (GPU.Kind != GK_NONE) 310 Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__")); 311 312 if (Opts.CUDAIsDevice) 313 defineCudaArchMacro(GCN_Subarch, Builder); 314 315 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 316 // removed in the near future. 317 if (GPU.HasFMAF) 318 Builder.defineMacro("__HAS_FMAF__"); 319 if (GPU.HasFastFMAF) 320 Builder.defineMacro("FP_FAST_FMAF"); 321 if (GPU.HasLDEXPF) 322 Builder.defineMacro("__HAS_LDEXPF__"); 323 if (GPU.HasFP64) 324 Builder.defineMacro("__HAS_FP64__"); 325 if (GPU.HasFastFMA) 326 Builder.defineMacro("FP_FAST_FMA"); 327 } 328