1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 34 35 static const char *const DataLayoutStringSIPrivateIsZero = 36 "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 39 40 static const char *const DataLayoutStringSIGenericIsZero = 41 "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32" 42 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 43 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; 44 45 static const LangAS::Map AMDGPUPrivIsZeroDefIsGenMap = { 46 4, // Default 47 1, // opencl_global 48 3, // opencl_local 49 2, // opencl_constant 50 0, // opencl_private 51 4, // opencl_generic 52 1, // cuda_device 53 2, // cuda_constant 54 3 // cuda_shared 55 }; 56 57 static const LangAS::Map AMDGPUGenIsZeroDefIsGenMap = { 58 0, // Default 59 1, // opencl_global 60 3, // opencl_local 61 2, // opencl_constant 62 5, // opencl_private 63 0, // opencl_generic 64 1, // cuda_device 65 2, // cuda_constant 66 3 // cuda_shared 67 }; 68 69 static const LangAS::Map AMDGPUPrivIsZeroDefIsPrivMap = { 70 0, // Default 71 1, // opencl_global 72 3, // opencl_local 73 2, // opencl_constant 74 0, // opencl_private 75 4, // opencl_generic 76 1, // cuda_device 77 2, // cuda_constant 78 3 // cuda_shared 79 }; 80 81 static const LangAS::Map AMDGPUGenIsZeroDefIsPrivMap = { 82 5, // Default 83 1, // opencl_global 84 3, // opencl_local 85 2, // opencl_constant 86 5, // opencl_private 87 0, // opencl_generic 88 1, // cuda_device 89 2, // cuda_constant 90 3 // cuda_shared 91 }; 92 } // namespace targets 93 } // namespace clang 94 95 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 96 #define BUILTIN(ID, TYPE, ATTRS) \ 97 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 98 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 99 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 100 #include "clang/Basic/BuiltinsAMDGPU.def" 101 }; 102 103 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 104 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 105 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 106 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 107 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 108 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 109 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 110 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 111 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 112 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 113 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 114 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 115 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 116 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 117 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 118 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 119 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 120 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 121 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 122 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 123 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 124 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 125 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 126 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 127 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 128 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 129 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 130 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 131 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 132 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 133 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 134 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 135 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 136 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 137 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 138 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 139 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 140 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 141 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 142 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 143 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 144 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 145 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 146 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 147 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 148 "flat_scratch_lo", "flat_scratch_hi" 149 }; 150 151 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 152 return llvm::makeArrayRef(GCCRegNames); 153 } 154 155 bool AMDGPUTargetInfo::initFeatureMap( 156 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 157 const std::vector<std::string> &FeatureVec) const { 158 159 // XXX - What does the member GPU mean if device name string passed here? 160 if (getTriple().getArch() == llvm::Triple::amdgcn) { 161 if (CPU.empty()) 162 CPU = "tahiti"; 163 164 switch (parseAMDGCNName(CPU)) { 165 case GK_GFX6: 166 case GK_GFX7: 167 break; 168 169 case GK_GFX9: 170 Features["gfx9-insts"] = true; 171 LLVM_FALLTHROUGH; 172 case GK_GFX8: 173 Features["s-memrealtime"] = true; 174 Features["16-bit-insts"] = true; 175 Features["dpp"] = true; 176 break; 177 178 case GK_NONE: 179 return false; 180 default: 181 llvm_unreachable("unhandled subtarget"); 182 } 183 } else { 184 if (CPU.empty()) 185 CPU = "r600"; 186 187 switch (parseR600Name(CPU)) { 188 case GK_R600: 189 case GK_R700: 190 case GK_EVERGREEN: 191 case GK_NORTHERN_ISLANDS: 192 break; 193 case GK_R600_DOUBLE_OPS: 194 case GK_R700_DOUBLE_OPS: 195 case GK_EVERGREEN_DOUBLE_OPS: 196 case GK_CAYMAN: 197 Features["fp64"] = true; 198 break; 199 case GK_NONE: 200 return false; 201 default: 202 llvm_unreachable("unhandled subtarget"); 203 } 204 } 205 206 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 207 } 208 209 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 210 TargetOptions &TargetOpts) const { 211 bool hasFP32Denormals = false; 212 bool hasFP64Denormals = false; 213 for (auto &I : TargetOpts.FeaturesAsWritten) { 214 if (I == "+fp32-denormals" || I == "-fp32-denormals") 215 hasFP32Denormals = true; 216 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 217 hasFP64Denormals = true; 218 } 219 if (!hasFP32Denormals) 220 TargetOpts.Features.push_back( 221 (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm 222 ? '+' 223 : '-') + 224 Twine("fp32-denormals")) 225 .str()); 226 // Always do not flush fp64 or fp16 denorms. 227 if (!hasFP64Denormals && hasFP64) 228 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 229 } 230 231 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) { 232 return llvm::StringSwitch<GPUKind>(Name) 233 .Case("r600", GK_R600) 234 .Case("rv610", GK_R600) 235 .Case("rv620", GK_R600) 236 .Case("rv630", GK_R600) 237 .Case("rv635", GK_R600) 238 .Case("rs780", GK_R600) 239 .Case("rs880", GK_R600) 240 .Case("rv670", GK_R600_DOUBLE_OPS) 241 .Case("rv710", GK_R700) 242 .Case("rv730", GK_R700) 243 .Case("rv740", GK_R700_DOUBLE_OPS) 244 .Case("rv770", GK_R700_DOUBLE_OPS) 245 .Case("palm", GK_EVERGREEN) 246 .Case("cedar", GK_EVERGREEN) 247 .Case("sumo", GK_EVERGREEN) 248 .Case("sumo2", GK_EVERGREEN) 249 .Case("redwood", GK_EVERGREEN) 250 .Case("juniper", GK_EVERGREEN) 251 .Case("hemlock", GK_EVERGREEN_DOUBLE_OPS) 252 .Case("cypress", GK_EVERGREEN_DOUBLE_OPS) 253 .Case("barts", GK_NORTHERN_ISLANDS) 254 .Case("turks", GK_NORTHERN_ISLANDS) 255 .Case("caicos", GK_NORTHERN_ISLANDS) 256 .Case("cayman", GK_CAYMAN) 257 .Case("aruba", GK_CAYMAN) 258 .Default(GK_NONE); 259 } 260 261 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { 262 return llvm::StringSwitch<GPUKind>(Name) 263 .Case("gfx600", GK_GFX6) 264 .Case("tahiti", GK_GFX6) 265 .Case("gfx601", GK_GFX6) 266 .Case("pitcairn", GK_GFX6) 267 .Case("verde", GK_GFX6) 268 .Case("oland", GK_GFX6) 269 .Case("hainan", GK_GFX6) 270 .Case("gfx700", GK_GFX7) 271 .Case("bonaire", GK_GFX7) 272 .Case("kaveri", GK_GFX7) 273 .Case("gfx701", GK_GFX7) 274 .Case("hawaii", GK_GFX7) 275 .Case("gfx702", GK_GFX7) 276 .Case("gfx703", GK_GFX7) 277 .Case("kabini", GK_GFX7) 278 .Case("mullins", GK_GFX7) 279 .Case("gfx800", GK_GFX8) 280 .Case("iceland", GK_GFX8) 281 .Case("gfx801", GK_GFX8) 282 .Case("carrizo", GK_GFX8) 283 .Case("gfx802", GK_GFX8) 284 .Case("tonga", GK_GFX8) 285 .Case("gfx803", GK_GFX8) 286 .Case("fiji", GK_GFX8) 287 .Case("polaris10", GK_GFX8) 288 .Case("polaris11", GK_GFX8) 289 .Case("gfx804", GK_GFX8) 290 .Case("gfx810", GK_GFX8) 291 .Case("stoney", GK_GFX8) 292 .Case("gfx900", GK_GFX9) 293 .Case("gfx901", GK_GFX9) 294 .Case("gfx902", GK_GFX9) 295 .Case("gfx903", GK_GFX9) 296 .Default(GK_NONE); 297 } 298 299 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 300 if (isGenericZero(getTriple())) { 301 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap 302 : &AMDGPUGenIsZeroDefIsGenMap; 303 } else { 304 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap 305 : &AMDGPUPrivIsZeroDefIsGenMap; 306 } 307 } 308 309 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 310 const TargetOptions &Opts) 311 : TargetInfo(Triple), GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600), 312 hasFP64(false), hasFMAF(false), hasLDEXPF(false), 313 AS(isGenericZero(Triple)) { 314 if (getTriple().getArch() == llvm::Triple::amdgcn) { 315 hasFP64 = true; 316 hasFMAF = true; 317 hasLDEXPF = true; 318 } 319 auto IsGenericZero = isGenericZero(Triple); 320 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn 321 ? (IsGenericZero ? DataLayoutStringSIGenericIsZero 322 : DataLayoutStringSIPrivateIsZero) 323 : DataLayoutStringR600); 324 assert(DataLayout->getAllocaAddrSpace() == AS.Private); 325 326 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 327 Triple.getEnvironment() == llvm::Triple::OpenCL || 328 Triple.getEnvironmentName() == "amdgizcl" || 329 !isAMDGCN(Triple)); 330 UseAddrSpaceMapMangling = true; 331 332 // Set pointer width and alignment for target address space 0. 333 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 334 if (getMaxPointerWidth() == 64) { 335 LongWidth = LongAlign = 64; 336 SizeType = UnsignedLong; 337 PtrDiffType = SignedLong; 338 IntPtrType = SignedLong; 339 } 340 341 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 342 } 343 344 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 345 TargetInfo::adjust(Opts); 346 setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple())); 347 } 348 349 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 350 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 351 Builtin::FirstTSBuiltin); 352 } 353 354 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 355 MacroBuilder &Builder) const { 356 if (getTriple().getArch() == llvm::Triple::amdgcn) 357 Builder.defineMacro("__AMDGCN__"); 358 else 359 Builder.defineMacro("__R600__"); 360 361 if (hasFMAF) 362 Builder.defineMacro("__HAS_FMAF__"); 363 if (hasLDEXPF) 364 Builder.defineMacro("__HAS_LDEXPF__"); 365 if (hasFP64) 366 Builder.defineMacro("__HAS_FP64__"); 367 } 368