1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements AMDGPU TargetInfo objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/MacroBuilder.h" 18 #include "clang/Basic/TargetBuiltins.h" 19 #include "clang/Frontend/CodeGenOptions.h" 20 #include "llvm/ADT/StringSwitch.h" 21 22 using namespace clang; 23 using namespace clang::targets; 24 25 namespace clang { 26 namespace targets { 27 28 // If you edit the description strings, make sure you update 29 // getPointerWidthV(). 30 31 static const char *const DataLayoutStringR600 = 32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; 34 35 static const char *const DataLayoutStringSIPrivateIsZero = 36 "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 39 40 static const char *const DataLayoutStringSIGenericIsZero = 41 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 42 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 43 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; 44 45 static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = { 46 4, // Default 47 1, // opencl_global 48 3, // opencl_local 49 4, // opencl_constant 50 0, // opencl_private 51 4, // opencl_generic 52 1, // cuda_device 53 4, // cuda_constant 54 3 // cuda_shared 55 }; 56 57 static const LangASMap AMDGPUGenIsZeroDefIsGenMap = { 58 0, // Default 59 1, // opencl_global 60 3, // opencl_local 61 4, // opencl_constant 62 5, // opencl_private 63 0, // opencl_generic 64 1, // cuda_device 65 4, // cuda_constant 66 3 // cuda_shared 67 }; 68 69 static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = { 70 0, // Default 71 1, // opencl_global 72 3, // opencl_local 73 4, // opencl_constant 74 0, // opencl_private 75 4, // opencl_generic 76 1, // cuda_device 77 4, // cuda_constant 78 3 // cuda_shared 79 }; 80 81 static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = { 82 5, // Default 83 1, // opencl_global 84 3, // opencl_local 85 4, // opencl_constant 86 5, // opencl_private 87 0, // opencl_generic 88 1, // cuda_device 89 4, // cuda_constant 90 3 // cuda_shared 91 }; 92 } // namespace targets 93 } // namespace clang 94 95 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 96 #define BUILTIN(ID, TYPE, ATTRS) \ 97 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 98 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 99 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 100 #include "clang/Basic/BuiltinsAMDGPU.def" 101 }; 102 103 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 104 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 105 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 106 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 107 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 108 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 109 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 110 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 111 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 112 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 113 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 114 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 115 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 116 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 117 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 118 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 119 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 120 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 121 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 122 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 123 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 124 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 125 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 126 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 127 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 128 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 129 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 130 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 131 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 132 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 133 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 134 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 135 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 136 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 137 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 138 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 139 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 140 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 141 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 142 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 143 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 144 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 145 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 146 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 147 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 148 "flat_scratch_lo", "flat_scratch_hi" 149 }; 150 151 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 152 return llvm::makeArrayRef(GCCRegNames); 153 } 154 155 bool AMDGPUTargetInfo::initFeatureMap( 156 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 157 const std::vector<std::string> &FeatureVec) const { 158 159 // XXX - What does the member GPU mean if device name string passed here? 160 if (getTriple().getArch() == llvm::Triple::amdgcn) { 161 if (CPU.empty()) 162 CPU = "tahiti"; 163 164 switch (parseAMDGCNName(CPU)) { 165 case GK_GFX6: 166 case GK_GFX7: 167 break; 168 169 case GK_GFX9: 170 Features["gfx9-insts"] = true; 171 LLVM_FALLTHROUGH; 172 case GK_GFX8: 173 Features["s-memrealtime"] = true; 174 Features["16-bit-insts"] = true; 175 Features["dpp"] = true; 176 break; 177 178 case GK_NONE: 179 return false; 180 default: 181 llvm_unreachable("unhandled subtarget"); 182 } 183 } else { 184 if (CPU.empty()) 185 CPU = "r600"; 186 187 switch (parseR600Name(CPU)) { 188 case GK_R600: 189 case GK_R700: 190 case GK_EVERGREEN: 191 case GK_NORTHERN_ISLANDS: 192 break; 193 case GK_R600_DOUBLE_OPS: 194 case GK_R700_DOUBLE_OPS: 195 case GK_EVERGREEN_DOUBLE_OPS: 196 case GK_CAYMAN: 197 // TODO: Add fp64 when implemented. 198 break; 199 case GK_NONE: 200 return false; 201 default: 202 llvm_unreachable("unhandled subtarget"); 203 } 204 } 205 206 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 207 } 208 209 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 210 TargetOptions &TargetOpts) const { 211 bool hasFP32Denormals = false; 212 bool hasFP64Denormals = false; 213 for (auto &I : TargetOpts.FeaturesAsWritten) { 214 if (I == "+fp32-denormals" || I == "-fp32-denormals") 215 hasFP32Denormals = true; 216 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 217 hasFP64Denormals = true; 218 } 219 if (!hasFP32Denormals) 220 TargetOpts.Features.push_back( 221 (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm 222 ? '+' 223 : '-') + 224 Twine("fp32-denormals")) 225 .str()); 226 // Always do not flush fp64 or fp16 denorms. 227 if (!hasFP64Denormals && hasFP64) 228 TargetOpts.Features.push_back("+fp64-fp16-denormals"); 229 } 230 231 232 constexpr AMDGPUTargetInfo::NameGPUKind AMDGPUTargetInfo::R600Names[]; 233 constexpr AMDGPUTargetInfo::NameGPUKind AMDGPUTargetInfo::AMDGCNNames[]; 234 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) { 235 const auto *Result = llvm::find_if( 236 R600Names, [Name](const NameGPUKind &Kind) { return Kind.Name == Name; }); 237 238 if (Result == std::end(R600Names)) 239 return GK_NONE; 240 return Result->Kind; 241 } 242 243 AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { 244 const auto *Result = 245 llvm::find_if(AMDGCNNames, [Name](const NameGPUKind &Kind) { 246 return Kind.Name == Name; 247 }); 248 249 if (Result == std::end(AMDGCNNames)) 250 return GK_NONE; 251 return Result->Kind; 252 } 253 254 void AMDGPUTargetInfo::fillValidCPUList( 255 SmallVectorImpl<StringRef> &Values) const { 256 if (getTriple().getArch() == llvm::Triple::amdgcn) 257 llvm::for_each(AMDGCNNames, [&Values](const NameGPUKind &Kind) { 258 Values.emplace_back(Kind.Name);}); 259 else 260 llvm::for_each(R600Names, [&Values](const NameGPUKind &Kind) { 261 Values.emplace_back(Kind.Name);}); 262 } 263 264 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 265 if (isGenericZero(getTriple())) { 266 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap 267 : &AMDGPUGenIsZeroDefIsGenMap; 268 } else { 269 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap 270 : &AMDGPUPrivIsZeroDefIsGenMap; 271 } 272 } 273 274 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 275 const TargetOptions &Opts) 276 : TargetInfo(Triple), 277 GPU(isAMDGCN(Triple) ? GK_GFX6 : parseR600Name(Opts.CPU)), 278 hasFP64(false), hasFMAF(false), hasLDEXPF(false), 279 AS(isGenericZero(Triple)) { 280 if (getTriple().getArch() == llvm::Triple::amdgcn) { 281 hasFP64 = true; 282 hasFMAF = true; 283 hasLDEXPF = true; 284 } 285 if (getTriple().getArch() == llvm::Triple::r600) { 286 if (GPU == GK_EVERGREEN_DOUBLE_OPS || GPU == GK_CAYMAN) { 287 hasFMAF = true; 288 } 289 } 290 auto IsGenericZero = isGenericZero(Triple); 291 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn 292 ? (IsGenericZero ? DataLayoutStringSIGenericIsZero 293 : DataLayoutStringSIPrivateIsZero) 294 : DataLayoutStringR600); 295 assert(DataLayout->getAllocaAddrSpace() == AS.Private); 296 297 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 298 Triple.getEnvironment() == llvm::Triple::OpenCL || 299 Triple.getEnvironmentName() == "amdgizcl" || 300 !isAMDGCN(Triple)); 301 UseAddrSpaceMapMangling = true; 302 303 // Set pointer width and alignment for target address space 0. 304 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 305 if (getMaxPointerWidth() == 64) { 306 LongWidth = LongAlign = 64; 307 SizeType = UnsignedLong; 308 PtrDiffType = SignedLong; 309 IntPtrType = SignedLong; 310 } 311 312 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 313 } 314 315 void AMDGPUTargetInfo::adjust(LangOptions &Opts) { 316 TargetInfo::adjust(Opts); 317 setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple())); 318 } 319 320 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 321 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 322 Builtin::FirstTSBuiltin); 323 } 324 325 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 326 MacroBuilder &Builder) const { 327 if (getTriple().getArch() == llvm::Triple::amdgcn) 328 Builder.defineMacro("__AMDGCN__"); 329 else 330 Builder.defineMacro("__R600__"); 331 332 if (hasFMAF) 333 Builder.defineMacro("__HAS_FMAF__"); 334 if (hasLDEXPF) 335 Builder.defineMacro("__HAS_LDEXPF__"); 336 if (hasFP64) 337 Builder.defineMacro("__HAS_FP64__"); 338 } 339