1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements NVPTX TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTX.h" 14 #include "Targets.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/MacroBuilder.h" 17 #include "clang/Basic/TargetBuiltins.h" 18 #include "llvm/ADT/StringSwitch.h" 19 20 using namespace clang; 21 using namespace clang::targets; 22 23 const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = { 24 #define BUILTIN(ID, TYPE, ATTRS) \ 25 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 26 #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ 27 {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr}, 28 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 29 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 30 #include "clang/Basic/BuiltinsNVPTX.def" 31 }; 32 33 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"}; 34 35 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, 36 const TargetOptions &Opts, 37 unsigned TargetPointerWidth) 38 : TargetInfo(Triple) { 39 assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && 40 "NVPTX only supports 32- and 64-bit modes."); 41 42 PTXVersion = 32; 43 for (const StringRef Feature : Opts.FeaturesAsWritten) { 44 if (!Feature.startswith("+ptx")) 45 continue; 46 PTXVersion = llvm::StringSwitch<unsigned>(Feature) 47 .Case("+ptx70", 70) 48 .Case("+ptx65", 65) 49 .Case("+ptx64", 64) 50 .Case("+ptx63", 63) 51 .Case("+ptx61", 61) 52 .Case("+ptx60", 60) 53 .Case("+ptx50", 50) 54 .Case("+ptx43", 43) 55 .Case("+ptx42", 42) 56 .Case("+ptx41", 41) 57 .Case("+ptx40", 40) 58 .Case("+ptx32", 32) 59 .Default(32); 60 } 61 62 TLSSupported = false; 63 VLASupported = false; 64 AddrSpaceMap = &NVPTXAddrSpaceMap; 65 UseAddrSpaceMapMangling = true; 66 67 // Define available target features 68 // These must be defined in sorted order! 69 NoAsmVariants = true; 70 GPU = CudaArch::SM_20; 71 72 if (TargetPointerWidth == 32) 73 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 74 else if (Opts.NVPTXUseShortPointers) 75 resetDataLayout( 76 "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 77 else 78 resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 79 80 // If possible, get a TargetInfo for our host triple, so we can match its 81 // types. 82 llvm::Triple HostTriple(Opts.HostTriple); 83 if (!HostTriple.isNVPTX()) 84 HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts)); 85 86 // If no host target, make some guesses about the data layout and return. 87 if (!HostTarget) { 88 LongWidth = LongAlign = TargetPointerWidth; 89 PointerWidth = PointerAlign = TargetPointerWidth; 90 switch (TargetPointerWidth) { 91 case 32: 92 SizeType = TargetInfo::UnsignedInt; 93 PtrDiffType = TargetInfo::SignedInt; 94 IntPtrType = TargetInfo::SignedInt; 95 break; 96 case 64: 97 SizeType = TargetInfo::UnsignedLong; 98 PtrDiffType = TargetInfo::SignedLong; 99 IntPtrType = TargetInfo::SignedLong; 100 break; 101 default: 102 llvm_unreachable("TargetPointerWidth must be 32 or 64"); 103 } 104 return; 105 } 106 107 // Copy properties from host target. 108 PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0); 109 PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0); 110 BoolWidth = HostTarget->getBoolWidth(); 111 BoolAlign = HostTarget->getBoolAlign(); 112 IntWidth = HostTarget->getIntWidth(); 113 IntAlign = HostTarget->getIntAlign(); 114 HalfWidth = HostTarget->getHalfWidth(); 115 HalfAlign = HostTarget->getHalfAlign(); 116 FloatWidth = HostTarget->getFloatWidth(); 117 FloatAlign = HostTarget->getFloatAlign(); 118 DoubleWidth = HostTarget->getDoubleWidth(); 119 DoubleAlign = HostTarget->getDoubleAlign(); 120 LongWidth = HostTarget->getLongWidth(); 121 LongAlign = HostTarget->getLongAlign(); 122 LongLongWidth = HostTarget->getLongLongWidth(); 123 LongLongAlign = HostTarget->getLongLongAlign(); 124 MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0); 125 NewAlign = HostTarget->getNewAlign(); 126 DefaultAlignForAttributeAligned = 127 HostTarget->getDefaultAlignForAttributeAligned(); 128 SizeType = HostTarget->getSizeType(); 129 IntMaxType = HostTarget->getIntMaxType(); 130 PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0); 131 IntPtrType = HostTarget->getIntPtrType(); 132 WCharType = HostTarget->getWCharType(); 133 WIntType = HostTarget->getWIntType(); 134 Char16Type = HostTarget->getChar16Type(); 135 Char32Type = HostTarget->getChar32Type(); 136 Int64Type = HostTarget->getInt64Type(); 137 SigAtomicType = HostTarget->getSigAtomicType(); 138 ProcessIDType = HostTarget->getProcessIDType(); 139 140 UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); 141 UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); 142 UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); 143 ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); 144 145 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and 146 // we need those macros to be identical on host and device, because (among 147 // other things) they affect which standard library classes are defined, and 148 // we need all classes to be defined on both the host and device. 149 MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); 150 151 // Properties intentionally not copied from host: 152 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the 153 // host/device boundary. 154 // - SuitableAlign: Not visible across the host/device boundary, and may 155 // correctly be different on host/device, e.g. if host has wider vector 156 // types than device. 157 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same 158 // as its double type, but that's not necessarily true on the host. 159 // TODO: nvcc emits a warning when using long double on device; we should 160 // do the same. 161 } 162 163 ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { 164 return llvm::makeArrayRef(GCCRegNames); 165 } 166 167 bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { 168 return llvm::StringSwitch<bool>(Feature) 169 .Cases("ptx", "nvptx", true) 170 .Default(false); 171 } 172 173 void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, 174 MacroBuilder &Builder) const { 175 Builder.defineMacro("__PTX__"); 176 Builder.defineMacro("__NVPTX__"); 177 if (Opts.CUDAIsDevice) { 178 // Set __CUDA_ARCH__ for the GPU specified. 179 std::string CUDAArchCode = [this] { 180 switch (GPU) { 181 case CudaArch::GFX600: 182 case CudaArch::GFX601: 183 case CudaArch::GFX700: 184 case CudaArch::GFX701: 185 case CudaArch::GFX702: 186 case CudaArch::GFX703: 187 case CudaArch::GFX704: 188 case CudaArch::GFX801: 189 case CudaArch::GFX802: 190 case CudaArch::GFX803: 191 case CudaArch::GFX810: 192 case CudaArch::GFX900: 193 case CudaArch::GFX902: 194 case CudaArch::GFX904: 195 case CudaArch::GFX906: 196 case CudaArch::GFX908: 197 case CudaArch::GFX909: 198 case CudaArch::GFX1010: 199 case CudaArch::GFX1011: 200 case CudaArch::GFX1012: 201 case CudaArch::LAST: 202 break; 203 case CudaArch::UNKNOWN: 204 assert(false && "No GPU arch when compiling CUDA device code."); 205 return ""; 206 case CudaArch::SM_20: 207 return "200"; 208 case CudaArch::SM_21: 209 return "210"; 210 case CudaArch::SM_30: 211 return "300"; 212 case CudaArch::SM_32: 213 return "320"; 214 case CudaArch::SM_35: 215 return "350"; 216 case CudaArch::SM_37: 217 return "370"; 218 case CudaArch::SM_50: 219 return "500"; 220 case CudaArch::SM_52: 221 return "520"; 222 case CudaArch::SM_53: 223 return "530"; 224 case CudaArch::SM_60: 225 return "600"; 226 case CudaArch::SM_61: 227 return "610"; 228 case CudaArch::SM_62: 229 return "620"; 230 case CudaArch::SM_70: 231 return "700"; 232 case CudaArch::SM_72: 233 return "720"; 234 case CudaArch::SM_75: 235 return "750"; 236 case CudaArch::SM_80: 237 return "800"; 238 } 239 llvm_unreachable("unhandled CudaArch"); 240 }(); 241 Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); 242 } 243 } 244 245 ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const { 246 return llvm::makeArrayRef(BuiltinInfo, clang::NVPTX::LastTSBuiltin - 247 Builtin::FirstTSBuiltin); 248 } 249