1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 34 35 static const char *const DataLayoutStringSIPrivateIsZero = 36 "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 39 40 static const char *const DataLayoutStringSIGenericIsZero = 41 "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32" 42 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 43 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; 44 45 static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = { 46 4, // Default 47 1, // opencl_global 48 3, // opencl_local 49 2, // opencl_constant 50 0, // opencl_private 51 4, // opencl_generic 52 1, // cuda_device 53 2, // cuda_constant 54 3 // cuda_shared 55 }; 56 57 static const LangASMap AMDGPUGenIsZeroDefIsGenMap = { 58 0, // Default 59 1, // opencl_global 60 3, // opencl_local 61 2, // opencl_constant 62 5, // opencl_private 63 0, // opencl_generic 64 1, // cuda_device 65 2, // cuda_constant 66 3 // cuda_shared 67 }; 68 69 static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = { 70 0, // Default 71 1, // opencl_global 72 3, // opencl_local 73 2, // opencl_constant 74 0, // opencl_private 75 4, // opencl_generic 76 1, // cuda_device 77 2, // cuda_constant 78 3 // cuda_shared 79 }; 80 81 static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = { 82 5, // Default 83 1, // opencl_global 84 3, // opencl_local 85 2, // opencl_constant 86 5, // opencl_private 87 0, // opencl_generic 88 1, // cuda_device 89 2, // cuda_constant 90 3 // cuda_shared 91 }; 92 } // namespace targets 93 } // namespace clang 94 95 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 96 #define BUILTIN(ID, TYPE, ATTRS) \ 97 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 98 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 99 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 100 #include "clang/Basic/BuiltinsAMDGPU.def" 101 }; 102 103 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 104 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 105 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 106 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 107 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 108 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 109 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 110 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 111 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 112 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 113 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 114 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 115 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 116 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 117 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 118 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 119 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 120 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 121 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 122 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 123 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 124 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 125 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 126 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 127 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 128 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 129 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 130 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 131 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 132 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 133 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 134 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 135 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 136 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 137 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 138 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 139 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 140 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 141 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 142 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 143 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 144 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 145 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 146 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 147 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 148 "flat_scratch_lo", "flat_scratch_hi" 149 }; 150 151 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 152 return llvm::makeArrayRef(GCCRegNames); 153 } 154 155 bool AMDGPUTargetInfo::initFeatureMap( 156 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 157 const std::vector<std::string> &FeatureVec) const { 158 159 // XXX - What does the member GPU mean if device name string passed here? 160 if (getTriple().getArch() == llvm::Triple::amdgcn) { 161 if (CPU.empty()) 162 CPU = "tahiti"; 163 164 switch (parseAMDGCNName(CPU)) { 165 case GK_GFX6: 166 case GK_GFX7: 167 break; 168 169 case GK_GFX9: 170 Features["gfx9-insts"] = true; 171 LLVM_FALLTHROUGH; 172 case GK_GFX8: 173 Features["s-memrealtime"] = true; 174 Features["16-bit-insts"] = true; 175 Features["dpp"] = true; 176 break; 177 178 case GK_NONE: 179 return false; 180 default: 181 llvm_unreachable("unhandled subtarget"); 182 } 183 } else { 184 if (CPU.empty()) 185 CPU = "r600"; 186 187 switch (parseR600Name(CPU)) { 188 case GK_R600: 189 case GK_R700: 190 case GK_EVERGREEN: 191 case GK_NORTHERN_ISLANDS: 192 break; 193 case GK_R600_DOUBLE_OPS: 194 case GK_R700_DOUBLE_OPS: 195 case GK_EVERGREEN_DOUBLE_OPS: 196 case GK_CAYMAN: 197 Features["fp64"] = true; 198 break; 199 case GK_NONE: 200 return false; 201 default: 202 llvm_unreachable("unhandled subtarget"); 203 } 204 } 205 206 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 207 } 208 209 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 210 TargetOptions &TargetOpts) const { 211 bool hasFP32Denormals = false; 212 bool hasFP64Denormals = false; 213 for (auto &I : TargetOpts.FeaturesAsWritten) { 214 if (I == "+fp32-denormals" || I == "-fp32-denormals") 215 hasFP32Denormals = true; 216 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 217 hasFP64Denormals = true; 218 } 219 if (!hasFP32Denormals) 220 TargetOpts.Features.push_back( 221 (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm 222 ? '+' 223 : '-') + 224 Twine("fp32-denormals")) 225 .str()); 226 // Always do not flush fp64 or fp16 denorms. 227 if (!hasFP64Denormals && hasFP64) 228 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 229 } 230 231 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) { 232 return llvm::StringSwitch<GPUKind>(Name) 233 .Case("r600", GK_R600) 234 .Case("rv610", GK_R600) 235 .Case("rv620", GK_R600) 236 .Case("rv630", GK_R600) 237 .Case("rv635", GK_R600) 238 .Case("rs780", GK_R600) 239 .Case("rs880", GK_R600) 240 .Case("rv670", GK_R600_DOUBLE_OPS) 241 .Case("rv710", GK_R700) 242 .Case("rv730", GK_R700) 243 .Case("rv740", GK_R700_DOUBLE_OPS) 244 .Case("rv770", GK_R700_DOUBLE_OPS) 245 .Case("palm", GK_EVERGREEN) 246 .Case("cedar", GK_EVERGREEN) 247 .Case("sumo", GK_EVERGREEN) 248 .Case("sumo2", GK_EVERGREEN) 249 .Case("redwood", GK_EVERGREEN) 250 .Case("juniper", GK_EVERGREEN) 251 .Case("hemlock", GK_EVERGREEN_DOUBLE_OPS) 252 .Case("cypress", GK_EVERGREEN_DOUBLE_OPS) 253 .Case("barts", GK_NORTHERN_ISLANDS) 254 .Case("turks", GK_NORTHERN_ISLANDS) 255 .Case("caicos", GK_NORTHERN_ISLANDS) 256 .Case("cayman", GK_CAYMAN) 257 .Case("aruba", GK_CAYMAN) 258 .Default(GK_NONE); 259 } 260 261 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { 262 return llvm::StringSwitch<GPUKind>(Name) 263 .Case("gfx600", GK_GFX6) 264 .Case("tahiti", GK_GFX6) 265 .Case("gfx601", GK_GFX6) 266 .Case("pitcairn", GK_GFX6) 267 .Case("verde", GK_GFX6) 268 .Case("oland", GK_GFX6) 269 .Case("hainan", GK_GFX6) 270 .Case("gfx700", GK_GFX7) 271 .Case("bonaire", GK_GFX7) 272 .Case("kaveri", GK_GFX7) 273 .Case("gfx701", GK_GFX7) 274 .Case("hawaii", GK_GFX7) 275 .Case("gfx702", GK_GFX7) 276 .Case("gfx703", GK_GFX7) 277 .Case("kabini", GK_GFX7) 278 .Case("mullins", GK_GFX7) 279 .Case("gfx800", GK_GFX8) 280 .Case("iceland", GK_GFX8) 281 .Case("gfx801", GK_GFX8) 282 .Case("carrizo", GK_GFX8) 283 .Case("gfx802", GK_GFX8) 284 .Case("tonga", GK_GFX8) 285 .Case("gfx803", GK_GFX8) 286 .Case("fiji", GK_GFX8) 287 .Case("polaris10", GK_GFX8) 288 .Case("polaris11", GK_GFX8) 289 .Case("gfx804", GK_GFX8) 290 .Case("gfx810", GK_GFX8) 291 .Case("stoney", GK_GFX8) 292 .Case("gfx900", GK_GFX9) 293 .Case("gfx901", GK_GFX9) 294 .Case("gfx902", GK_GFX9) 295 .Case("gfx903", GK_GFX9) 296 .Default(GK_NONE); 297 } 298 299 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 300 if (isGenericZero(getTriple())) { 301 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap 302 : &AMDGPUGenIsZeroDefIsGenMap; 303 } else { 304 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap 305 : &AMDGPUPrivIsZeroDefIsGenMap; 306 } 307 } 308 309 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 310 const TargetOptions &Opts) 311 : TargetInfo(Triple), 312 GPU(isAMDGCN(Triple) ? GK_GFX6 : parseR600Name(Opts.CPU)), 313 hasFP64(false), hasFMAF(false), hasLDEXPF(false), 314 AS(isGenericZero(Triple)) { 315 if (getTriple().getArch() == llvm::Triple::amdgcn) { 316 hasFP64 = true; 317 hasFMAF = true; 318 hasLDEXPF = true; 319 } 320 if (getTriple().getArch() == llvm::Triple::r600) { 321 if (GPU == GK_EVERGREEN_DOUBLE_OPS || GPU == GK_CAYMAN) { 322 hasFMAF = true; 323 } 324 } 325 auto IsGenericZero = isGenericZero(Triple); 326 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn 327 ? (IsGenericZero ? DataLayoutStringSIGenericIsZero 328 : DataLayoutStringSIPrivateIsZero) 329 : DataLayoutStringR600); 330 assert(DataLayout->getAllocaAddrSpace() == AS.Private); 331 332 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 333 Triple.getEnvironment() == llvm::Triple::OpenCL || 334 Triple.getEnvironmentName() == "amdgizcl" || 335 !isAMDGCN(Triple)); 336 UseAddrSpaceMapMangling = true; 337 338 // Set pointer width and alignment for target address space 0. 339 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 340 if (getMaxPointerWidth() == 64) { 341 LongWidth = LongAlign = 64; 342 SizeType = UnsignedLong; 343 PtrDiffType = SignedLong; 344 IntPtrType = SignedLong; 345 } 346 347 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 348 } 349 350 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 351 TargetInfo::adjust(Opts); 352 setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple())); 353 } 354 355 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 356 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 357 Builtin::FirstTSBuiltin); 358 } 359 360 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 361 MacroBuilder &Builder) const { 362 if (getTriple().getArch() == llvm::Triple::amdgcn) 363 Builder.defineMacro("__AMDGCN__"); 364 else 365 Builder.defineMacro("__R600__"); 366 367 if (hasFMAF) 368 Builder.defineMacro("__HAS_FMAF__"); 369 if (hasLDEXPF) 370 Builder.defineMacro("__HAS_LDEXPF__"); 371 if (hasFP64) 372 Builder.defineMacro("__HAS_FP64__"); 373 } 374