1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===------------------------------------------------------------===// 9 10include "llvm/Target/Target.td" 11 12//===------------------------------------------------------------===// 13// Subtarget Features (device properties) 14//===------------------------------------------------------------===// 15 16def FeatureFP64 : SubtargetFeature<"fp64", 17 "FP64", 18 "true", 19 "Enable double precision operations" 20>; 21 22def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 23 "FastFMAF32", 24 "true", 25 "Assuming f32 fma is at least as fast as mul + add" 26>; 27 28def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 29 "HalfRate64Ops", 30 "true", 31 "Most fp64 instructions are half rate instead of quarter" 32>; 33 34def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", 35 "R600ALUInst", 36 "false", 37 "Older version of ALU instructions encoding" 38>; 39 40def FeatureVertexCache : SubtargetFeature<"HasVertexCache", 41 "HasVertexCache", 42 "true", 43 "Specify use of dedicated vertex cache" 44>; 45 46def FeatureCaymanISA : SubtargetFeature<"caymanISA", 47 "CaymanISA", 48 "true", 49 "Use Cayman ISA" 50>; 51 52def FeatureCFALUBug : SubtargetFeature<"cfalubug", 53 "CFALUBug", 54 "true", 55 "GPU has CF_ALU bug" 56>; 57 58def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 59 "FlatAddressSpace", 60 "true", 61 "Support flat address space" 62>; 63 64def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 65 "UnalignedBufferAccess", 66 "true", 67 "Support unaligned global loads and stores" 68>; 69 70def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", 71 "UnalignedScratchAccess", 72 "true", 73 "Support unaligned scratch loads and stores" 74>; 75 76def FeatureXNACK : SubtargetFeature<"xnack", 77 "EnableXNACK", 78 "true", 79 "Enable XNACK support" 80>; 81 82def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 83 "SGPRInitBug", 84 "true", 85 "VI SGPR initilization bug requiring a fixed SGPR allocation size" 86>; 87 88class SubtargetFeatureFetchLimit <string Value> : 89 SubtargetFeature <"fetch"#Value, 90 "TexVTXClauseSize", 91 Value, 92 "Limit the maximum number of fetches in a clause to "#Value 93>; 94 95def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">; 96def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">; 97 98class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature< 99 "wavefrontsize"#Value, 100 "WavefrontSize", 101 !cast<string>(Value), 102 "The number of threads per wavefront" 103>; 104 105def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; 106def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; 107def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; 108 109class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 110 "ldsbankcount"#Value, 111 "LDSBankCount", 112 !cast<string>(Value), 113 "The number of LDS banks per compute unit." 114>; 115 116def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 117def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 118 119class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping> 120 : SubtargetFeature < 121 "isaver"#Major#"."#Minor#"."#Stepping, 122 "IsaVersion", 123 "ISAVersion"#Major#"_"#Minor#"_"#Stepping, 124 "Instruction set version number" 125>; 126 127def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>; 128def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>; 129def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>; 130def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>; 131def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2>; 132def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>; 133 134class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< 135 "localmemorysize"#Value, 136 "LocalMemorySize", 137 !cast<string>(Value), 138 "The size of local memory in bytes" 139>; 140 141def FeatureGCN : SubtargetFeature<"gcn", 142 "IsGCN", 143 "true", 144 "GCN or newer GPU" 145>; 146 147def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding", 148 "GCN1Encoding", 149 "true", 150 "Encoding format for SI and CI" 151>; 152 153def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 154 "GCN3Encoding", 155 "true", 156 "Encoding format for VI" 157>; 158 159def FeatureCIInsts : SubtargetFeature<"ci-insts", 160 "CIInsts", 161 "true", 162 "Additional intstructions for CI+" 163>; 164 165def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 166 "HasSMemRealTime", 167 "true", 168 "Has s_memrealtime instruction" 169>; 170 171def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 172 "Has16BitInsts", 173 "true", 174 "Has i16/f16 instructions" 175>; 176 177def FeatureMovrel : SubtargetFeature<"movrel", 178 "HasMovrel", 179 "true", 180 "Has v_movrel*_b32 instructions" 181>; 182 183def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", 184 "HasVGPRIndexMode", 185 "true", 186 "Has VGPR mode register indexing" 187>; 188 189//===------------------------------------------------------------===// 190// Subtarget Features (options and debugging) 191//===------------------------------------------------------------===// 192 193// Some instructions do not support denormals despite this flag. Using 194// fp32 denormals also causes instructions to run at the double 195// precision rate for the device. 196def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", 197 "FP32Denormals", 198 "true", 199 "Enable single precision denormal handling" 200>; 201 202def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", 203 "FP64Denormals", 204 "true", 205 "Enable double precision denormal handling", 206 [FeatureFP64] 207>; 208 209def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", 210 "FPExceptions", 211 "true", 212 "Enable floating point exceptions" 213>; 214 215class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 216 "max-private-element-size-"#size, 217 "MaxPrivateElementSize", 218 !cast<string>(size), 219 "Maximum private access size may be "#size 220>; 221 222def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 223def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 224def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 225 226def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", 227 "EnableVGPRSpilling", 228 "true", 229 "Enable spilling of VGPRs to scratch memory" 230>; 231 232def FeatureDumpCode : SubtargetFeature <"DumpCode", 233 "DumpCode", 234 "true", 235 "Dump MachineInstrs in the CodeEmitter" 236>; 237 238def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 239 "DumpCode", 240 "true", 241 "Dump MachineInstrs in the CodeEmitter" 242>; 243 244def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", 245 "EnablePromoteAlloca", 246 "true", 247 "Enable promote alloca pass" 248>; 249 250// XXX - This should probably be removed once enabled by default 251def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 252 "EnableLoadStoreOpt", 253 "true", 254 "Enable SI load/store optimizer pass" 255>; 256 257// Performance debugging feature. Allow using DS instruction immediate 258// offsets even if the base pointer can't be proven to be base. On SI, 259// base pointer values that won't give the same result as a 16-bit add 260// are not safe to fold, but this will override the conservative test 261// for the base pointer. 262def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 263 "unsafe-ds-offset-folding", 264 "EnableUnsafeDSOffsetFolding", 265 "true", 266 "Force using DS instruction immediate offsets on SI" 267>; 268 269def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 270 "EnableSIScheduler", 271 "true", 272 "Enable SI Machine Scheduler" 273>; 274 275def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 276 "FlatForGlobal", 277 "true", 278 "Force to generate flat instruction for global" 279>; 280 281// Dummy feature used to disable assembler instructions. 282def FeatureDisable : SubtargetFeature<"", 283 "FeatureDisable","true", 284 "Dummy feature to disable assembler instructions" 285>; 286 287class SubtargetFeatureGeneration <string Value, 288 list<SubtargetFeature> Implies> : 289 SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value, 290 Value#" GPU generation", Implies>; 291 292def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; 293def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; 294def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; 295 296def FeatureR600 : SubtargetFeatureGeneration<"R600", 297 [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0] 298>; 299 300def FeatureR700 : SubtargetFeatureGeneration<"R700", 301 [FeatureFetchLimit16, FeatureLocalMemorySize0] 302>; 303 304def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN", 305 [FeatureFetchLimit16, FeatureLocalMemorySize32768] 306>; 307 308def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", 309 [FeatureFetchLimit16, FeatureWavefrontSize64, 310 FeatureLocalMemorySize32768] 311>; 312 313def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 314 [FeatureFP64, FeatureLocalMemorySize32768, 315 FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding, 316 FeatureLDSBankCount32, FeatureMovrel] 317>; 318 319def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", 320 [FeatureFP64, FeatureLocalMemorySize65536, 321 FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, 322 FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel] 323>; 324 325def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 326 [FeatureFP64, FeatureLocalMemorySize65536, 327 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, 328 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 329 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel 330 ] 331>; 332 333//===----------------------------------------------------------------------===// 334// Debugger related subtarget features. 335//===----------------------------------------------------------------------===// 336 337def FeatureDebuggerInsertNops : SubtargetFeature< 338 "amdgpu-debugger-insert-nops", 339 "DebuggerInsertNops", 340 "true", 341 "Insert one nop instruction for each high level source statement" 342>; 343 344def FeatureDebuggerReserveRegs : SubtargetFeature< 345 "amdgpu-debugger-reserve-regs", 346 "DebuggerReserveRegs", 347 "true", 348 "Reserve registers for debugger usage" 349>; 350 351def FeatureDebuggerEmitPrologue : SubtargetFeature< 352 "amdgpu-debugger-emit-prologue", 353 "DebuggerEmitPrologue", 354 "true", 355 "Emit debugger prologue" 356>; 357 358//===----------------------------------------------------------------------===// 359 360def AMDGPUInstrInfo : InstrInfo { 361 let guessInstructionProperties = 1; 362 let noNamedPositionallyEncodedOperands = 1; 363} 364 365def AMDGPUAsmParser : AsmParser { 366 // Some of the R600 registers have the same name, so this crashes. 367 // For example T0_XYZW and T0_XY both have the asm name T0. 368 let ShouldEmitMatchRegisterName = 0; 369} 370 371def AMDGPUAsmWriter : AsmWriter { 372 int PassSubtarget = 1; 373} 374 375def AMDGPUAsmVariants { 376 string Default = "Default"; 377 int Default_ID = 0; 378 string VOP3 = "VOP3"; 379 int VOP3_ID = 1; 380 string SDWA = "SDWA"; 381 int SDWA_ID = 2; 382 string DPP = "DPP"; 383 int DPP_ID = 3; 384 string Disable = "Disable"; 385 int Disable_ID = 4; 386} 387 388def DefaultAMDGPUAsmParserVariant : AsmParserVariant { 389 let Variant = AMDGPUAsmVariants.Default_ID; 390 let Name = AMDGPUAsmVariants.Default; 391} 392 393def VOP3AsmParserVariant : AsmParserVariant { 394 let Variant = AMDGPUAsmVariants.VOP3_ID; 395 let Name = AMDGPUAsmVariants.VOP3; 396} 397 398def SDWAAsmParserVariant : AsmParserVariant { 399 let Variant = AMDGPUAsmVariants.SDWA_ID; 400 let Name = AMDGPUAsmVariants.SDWA; 401} 402 403def DPPAsmParserVariant : AsmParserVariant { 404 let Variant = AMDGPUAsmVariants.DPP_ID; 405 let Name = AMDGPUAsmVariants.DPP; 406} 407 408def AMDGPU : Target { 409 // Pull in Instruction Info: 410 let InstructionSet = AMDGPUInstrInfo; 411 let AssemblyParsers = [AMDGPUAsmParser]; 412 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, 413 VOP3AsmParserVariant, 414 SDWAAsmParserVariant, 415 DPPAsmParserVariant]; 416 let AssemblyWriters = [AMDGPUAsmWriter]; 417} 418 419// Dummy Instruction itineraries for pseudo instructions 420def ALU_NULL : FuncUnit; 421def NullALU : InstrItinClass; 422 423//===----------------------------------------------------------------------===// 424// Predicate helper class 425//===----------------------------------------------------------------------===// 426 427def TruePredicate : Predicate<"true">; 428 429def isSICI : Predicate< 430 "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 431 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" 432>, AssemblerPredicate<"FeatureGCN1Encoding">; 433 434def isVI : Predicate < 435 "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 436 AssemblerPredicate<"FeatureGCN3Encoding">; 437 438def isCIVI : Predicate < 439 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || " 440 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS" 441>, AssemblerPredicate<"FeatureCIInsts">; 442 443def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; 444 445class PredicateControl { 446 Predicate SubtargetPredicate; 447 Predicate SIAssemblerPredicate = isSICI; 448 Predicate VIAssemblerPredicate = isVI; 449 list<Predicate> AssemblerPredicates = []; 450 Predicate AssemblerPredicate = TruePredicate; 451 list<Predicate> OtherPredicates = []; 452 list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate], 453 AssemblerPredicates, 454 OtherPredicates); 455} 456 457// Include AMDGPU TD files 458include "R600Schedule.td" 459include "SISchedule.td" 460include "Processors.td" 461include "AMDGPUInstrInfo.td" 462include "AMDGPUIntrinsics.td" 463include "AMDGPURegisterInfo.td" 464include "AMDGPUInstructions.td" 465include "AMDGPUCallingConv.td" 466