1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 34 35 static const char *const DataLayoutStringSIPrivateIsZero = 36 "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 39 40 static const char *const DataLayoutStringSIGenericIsZero = 41 "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32" 42 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 43 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; 44 45 static const LangAS::Map AMDGPUPrivIsZeroDefIsGenMap = { 46 4, // Default 47 1, // opencl_global 48 3, // opencl_local 49 2, // opencl_constant 50 4, // opencl_generic 51 1, // cuda_device 52 2, // cuda_constant 53 3 // cuda_shared 54 }; 55 56 static const LangAS::Map AMDGPUGenIsZeroDefIsGenMap = { 57 0, // Default 58 1, // opencl_global 59 3, // opencl_local 60 2, // opencl_constant 61 0, // opencl_generic 62 1, // cuda_device 63 2, // cuda_constant 64 3 // cuda_shared 65 }; 66 67 static const LangAS::Map AMDGPUPrivIsZeroDefIsPrivMap = { 68 0, // Default 69 1, // opencl_global 70 3, // opencl_local 71 2, // opencl_constant 72 4, // opencl_generic 73 1, // cuda_device 74 2, // cuda_constant 75 3 // cuda_shared 76 }; 77 78 static const LangAS::Map AMDGPUGenIsZeroDefIsPrivMap = { 79 5, // Default 80 1, // opencl_global 81 3, // opencl_local 82 2, // opencl_constant 83 0, // opencl_generic 84 1, // cuda_device 85 2, // cuda_constant 86 3 // cuda_shared 87 }; 88 } // namespace targets 89 } // namespace clang 90 91 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 92 #define BUILTIN(ID, TYPE, ATTRS) \ 93 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 94 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 95 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 96 #include "clang/Basic/BuiltinsAMDGPU.def" 97 }; 98 99 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 100 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 101 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 102 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 103 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 104 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 105 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 106 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 107 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 108 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 109 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 110 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 111 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 112 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 113 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 114 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 115 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 116 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 117 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 118 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 119 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 120 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 121 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 122 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 123 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 124 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 125 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 126 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 127 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 128 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 129 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 130 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 131 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 132 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 133 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 134 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 135 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 136 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 137 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 138 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 139 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 140 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 141 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 142 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 143 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 144 "flat_scratch_lo", "flat_scratch_hi" 145 }; 146 147 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 148 return llvm::makeArrayRef(GCCRegNames); 149 } 150 151 bool AMDGPUTargetInfo::initFeatureMap( 152 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 153 const std::vector<std::string> &FeatureVec) const { 154 155 // XXX - What does the member GPU mean if device name string passed here? 156 if (getTriple().getArch() == llvm::Triple::amdgcn) { 157 if (CPU.empty()) 158 CPU = "tahiti"; 159 160 switch (parseAMDGCNName(CPU)) { 161 case GK_GFX6: 162 case GK_GFX7: 163 break; 164 165 case GK_GFX9: 166 Features["gfx9-insts"] = true; 167 LLVM_FALLTHROUGH; 168 case GK_GFX8: 169 Features["s-memrealtime"] = true; 170 Features["16-bit-insts"] = true; 171 Features["dpp"] = true; 172 break; 173 174 case GK_NONE: 175 return false; 176 default: 177 llvm_unreachable("unhandled subtarget"); 178 } 179 } else { 180 if (CPU.empty()) 181 CPU = "r600"; 182 183 switch (parseR600Name(CPU)) { 184 case GK_R600: 185 case GK_R700: 186 case GK_EVERGREEN: 187 case GK_NORTHERN_ISLANDS: 188 break; 189 case GK_R600_DOUBLE_OPS: 190 case GK_R700_DOUBLE_OPS: 191 case GK_EVERGREEN_DOUBLE_OPS: 192 case GK_CAYMAN: 193 Features["fp64"] = true; 194 break; 195 case GK_NONE: 196 return false; 197 default: 198 llvm_unreachable("unhandled subtarget"); 199 } 200 } 201 202 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 203 } 204 205 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 206 TargetOptions &TargetOpts) const { 207 bool hasFP32Denormals = false; 208 bool hasFP64Denormals = false; 209 for (auto &I : TargetOpts.FeaturesAsWritten) { 210 if (I == "+fp32-denormals" || I == "-fp32-denormals") 211 hasFP32Denormals = true; 212 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 213 hasFP64Denormals = true; 214 } 215 if (!hasFP32Denormals) 216 TargetOpts.Features.push_back( 217 (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm 218 ? '+' 219 : '-') + 220 Twine("fp32-denormals")) 221 .str()); 222 // Always do not flush fp64 or fp16 denorms. 223 if (!hasFP64Denormals && hasFP64) 224 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 225 } 226 227 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) { 228 return llvm::StringSwitch<GPUKind>(Name) 229 .Case("r600", GK_R600) 230 .Case("rv610", GK_R600) 231 .Case("rv620", GK_R600) 232 .Case("rv630", GK_R600) 233 .Case("rv635", GK_R600) 234 .Case("rs780", GK_R600) 235 .Case("rs880", GK_R600) 236 .Case("rv670", GK_R600_DOUBLE_OPS) 237 .Case("rv710", GK_R700) 238 .Case("rv730", GK_R700) 239 .Case("rv740", GK_R700_DOUBLE_OPS) 240 .Case("rv770", GK_R700_DOUBLE_OPS) 241 .Case("palm", GK_EVERGREEN) 242 .Case("cedar", GK_EVERGREEN) 243 .Case("sumo", GK_EVERGREEN) 244 .Case("sumo2", GK_EVERGREEN) 245 .Case("redwood", GK_EVERGREEN) 246 .Case("juniper", GK_EVERGREEN) 247 .Case("hemlock", GK_EVERGREEN_DOUBLE_OPS) 248 .Case("cypress", GK_EVERGREEN_DOUBLE_OPS) 249 .Case("barts", GK_NORTHERN_ISLANDS) 250 .Case("turks", GK_NORTHERN_ISLANDS) 251 .Case("caicos", GK_NORTHERN_ISLANDS) 252 .Case("cayman", GK_CAYMAN) 253 .Case("aruba", GK_CAYMAN) 254 .Default(GK_NONE); 255 } 256 257 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { 258 return llvm::StringSwitch<GPUKind>(Name) 259 .Case("gfx600", GK_GFX6) 260 .Case("tahiti", GK_GFX6) 261 .Case("gfx601", GK_GFX6) 262 .Case("pitcairn", GK_GFX6) 263 .Case("verde", GK_GFX6) 264 .Case("oland", GK_GFX6) 265 .Case("hainan", GK_GFX6) 266 .Case("gfx700", GK_GFX7) 267 .Case("bonaire", GK_GFX7) 268 .Case("kaveri", GK_GFX7) 269 .Case("gfx701", GK_GFX7) 270 .Case("hawaii", GK_GFX7) 271 .Case("gfx702", GK_GFX7) 272 .Case("gfx703", GK_GFX7) 273 .Case("kabini", GK_GFX7) 274 .Case("mullins", GK_GFX7) 275 .Case("gfx800", GK_GFX8) 276 .Case("iceland", GK_GFX8) 277 .Case("gfx801", GK_GFX8) 278 .Case("carrizo", GK_GFX8) 279 .Case("gfx802", GK_GFX8) 280 .Case("tonga", GK_GFX8) 281 .Case("gfx803", GK_GFX8) 282 .Case("fiji", GK_GFX8) 283 .Case("polaris10", GK_GFX8) 284 .Case("polaris11", GK_GFX8) 285 .Case("gfx804", GK_GFX8) 286 .Case("gfx810", GK_GFX8) 287 .Case("stoney", GK_GFX8) 288 .Case("gfx900", GK_GFX9) 289 .Case("gfx901", GK_GFX9) 290 .Case("gfx902", GK_GFX9) 291 .Case("gfx903", GK_GFX9) 292 .Default(GK_NONE); 293 } 294 295 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 296 if (isGenericZero(getTriple())) { 297 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap 298 : &AMDGPUGenIsZeroDefIsGenMap; 299 } else { 300 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap 301 : &AMDGPUPrivIsZeroDefIsGenMap; 302 } 303 } 304 305 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 306 const TargetOptions &Opts) 307 : TargetInfo(Triple), GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600), 308 hasFP64(false), hasFMAF(false), hasLDEXPF(false), 309 AS(isGenericZero(Triple)) { 310 if (getTriple().getArch() == llvm::Triple::amdgcn) { 311 hasFP64 = true; 312 hasFMAF = true; 313 hasLDEXPF = true; 314 } 315 auto IsGenericZero = isGenericZero(Triple); 316 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn 317 ? (IsGenericZero ? DataLayoutStringSIGenericIsZero 318 : DataLayoutStringSIPrivateIsZero) 319 : DataLayoutStringR600); 320 assert(DataLayout->getAllocaAddrSpace() == AS.Private); 321 322 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 323 Triple.getEnvironment() == llvm::Triple::OpenCL || 324 Triple.getEnvironmentName() == "amdgizcl" || 325 !isAMDGCN(Triple)); 326 UseAddrSpaceMapMangling = true; 327 328 // Set pointer width and alignment for target address space 0. 329 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 330 if (getMaxPointerWidth() == 64) { 331 LongWidth = LongAlign = 64; 332 SizeType = UnsignedLong; 333 PtrDiffType = SignedLong; 334 IntPtrType = SignedLong; 335 } 336 337 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 338 } 339 340 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 341 TargetInfo::adjust(Opts); 342 setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple())); 343 } 344 345 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 346 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 347 Builtin::FirstTSBuiltin); 348 } 349 350 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 351 MacroBuilder &Builder) const { 352 if (getTriple().getArch() == llvm::Triple::amdgcn) 353 Builder.defineMacro("__AMDGCN__"); 354 else 355 Builder.defineMacro("__R600__"); 356 357 if (hasFMAF) 358 Builder.defineMacro("__HAS_FMAF__"); 359 if (hasLDEXPF) 360 Builder.defineMacro("__HAS_LDEXPF__"); 361 if (hasFP64) 362 Builder.defineMacro("__HAS_FP64__"); 363 } 364