1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===------------------------------------------------------------===// 9 10include "llvm/Target/Target.td" 11 12//===------------------------------------------------------------===// 13// Subtarget Features (device properties) 14//===------------------------------------------------------------===// 15 16def FeatureFP64 : SubtargetFeature<"fp64", 17 "FP64", 18 "true", 19 "Enable double precision operations" 20>; 21 22def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 23 "FastFMAF32", 24 "true", 25 "Assuming f32 fma is at least as fast as mul + add" 26>; 27 28def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 29 "HalfRate64Ops", 30 "true", 31 "Most fp64 instructions are half rate instead of quarter" 32>; 33 34def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", 35 "R600ALUInst", 36 "false", 37 "Older version of ALU instructions encoding" 38>; 39 40def FeatureVertexCache : SubtargetFeature<"HasVertexCache", 41 "HasVertexCache", 42 "true", 43 "Specify use of dedicated vertex cache" 44>; 45 46def FeatureCaymanISA : SubtargetFeature<"caymanISA", 47 "CaymanISA", 48 "true", 49 "Use Cayman ISA" 50>; 51 52def FeatureCFALUBug : SubtargetFeature<"cfalubug", 53 "CFALUBug", 54 "true", 55 "GPU has CF_ALU bug" 56>; 57 58def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 59 "FlatAddressSpace", 60 "true", 61 "Support flat address space" 62>; 63 64def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 65 "UnalignedBufferAccess", 66 "true", 67 "Support unaligned global loads and stores" 68>; 69 70def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", 71 "UnalignedScratchAccess", 72 "true", 73 "Support unaligned scratch loads and stores" 74>; 75 76// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support 77// XNACK. The current default kernel driver setting is: 78// - graphics ring: XNACK disabled 79// - compute ring: XNACK enabled 80// 81// If XNACK is enabled, the VMEM latency can be worse. 82// If XNACK is disabled, the 2 SGPRs can be used for general purposes. 83def FeatureXNACK : SubtargetFeature<"xnack", 84 "EnableXNACK", 85 "true", 86 "Enable XNACK support" 87>; 88 89def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 90 "SGPRInitBug", 91 "true", 92 "VI SGPR initilization bug requiring a fixed SGPR allocation size" 93>; 94 95class SubtargetFeatureFetchLimit <string Value> : 96 SubtargetFeature <"fetch"#Value, 97 "TexVTXClauseSize", 98 Value, 99 "Limit the maximum number of fetches in a clause to "#Value 100>; 101 102def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">; 103def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">; 104 105class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature< 106 "wavefrontsize"#Value, 107 "WavefrontSize", 108 !cast<string>(Value), 109 "The number of threads per wavefront" 110>; 111 112def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; 113def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; 114def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; 115 116class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 117 "ldsbankcount"#Value, 118 "LDSBankCount", 119 !cast<string>(Value), 120 "The number of LDS banks per compute unit." 121>; 122 123def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 124def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 125 126class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< 127 "localmemorysize"#Value, 128 "LocalMemorySize", 129 !cast<string>(Value), 130 "The size of local memory in bytes" 131>; 132 133def FeatureGCN : SubtargetFeature<"gcn", 134 "IsGCN", 135 "true", 136 "GCN or newer GPU" 137>; 138 139def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding", 140 "GCN1Encoding", 141 "true", 142 "Encoding format for SI and CI" 143>; 144 145def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 146 "GCN3Encoding", 147 "true", 148 "Encoding format for VI" 149>; 150 151def FeatureCIInsts : SubtargetFeature<"ci-insts", 152 "CIInsts", 153 "true", 154 "Additional intstructions for CI+" 155>; 156 157def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 158 "HasSMemRealTime", 159 "true", 160 "Has s_memrealtime instruction" 161>; 162 163def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", 164 "HasInv2PiInlineImm", 165 "true", 166 "Has 1 / (2 * pi) as inline immediate" 167>; 168 169def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 170 "Has16BitInsts", 171 "true", 172 "Has i16/f16 instructions" 173>; 174 175def FeatureMovrel : SubtargetFeature<"movrel", 176 "HasMovrel", 177 "true", 178 "Has v_movrel*_b32 instructions" 179>; 180 181def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", 182 "HasVGPRIndexMode", 183 "true", 184 "Has VGPR mode register indexing" 185>; 186 187def FeatureScalarStores : SubtargetFeature<"scalar-stores", 188 "HasScalarStores", 189 "true", 190 "Has store scalar memory instructions" 191>; 192 193//===------------------------------------------------------------===// 194// Subtarget Features (options and debugging) 195//===------------------------------------------------------------===// 196 197def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals", 198 "FP16Denormals", 199 "true", 200 "Enable half precision denormal handling" 201>; 202 203// Some instructions do not support denormals despite this flag. Using 204// fp32 denormals also causes instructions to run at the double 205// precision rate for the device. 206def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", 207 "FP32Denormals", 208 "true", 209 "Enable single precision denormal handling" 210>; 211 212def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", 213 "FP64Denormals", 214 "true", 215 "Enable double precision denormal handling", 216 [FeatureFP64] 217>; 218 219def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", 220 "FPExceptions", 221 "true", 222 "Enable floating point exceptions" 223>; 224 225class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 226 "max-private-element-size-"#size, 227 "MaxPrivateElementSize", 228 !cast<string>(size), 229 "Maximum private access size may be "#size 230>; 231 232def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 233def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 234def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 235 236def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", 237 "EnableVGPRSpilling", 238 "true", 239 "Enable spilling of VGPRs to scratch memory" 240>; 241 242def FeatureDumpCode : SubtargetFeature <"DumpCode", 243 "DumpCode", 244 "true", 245 "Dump MachineInstrs in the CodeEmitter" 246>; 247 248def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 249 "DumpCode", 250 "true", 251 "Dump MachineInstrs in the CodeEmitter" 252>; 253 254def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", 255 "EnablePromoteAlloca", 256 "true", 257 "Enable promote alloca pass" 258>; 259 260// XXX - This should probably be removed once enabled by default 261def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 262 "EnableLoadStoreOpt", 263 "true", 264 "Enable SI load/store optimizer pass" 265>; 266 267// Performance debugging feature. Allow using DS instruction immediate 268// offsets even if the base pointer can't be proven to be base. On SI, 269// base pointer values that won't give the same result as a 16-bit add 270// are not safe to fold, but this will override the conservative test 271// for the base pointer. 272def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 273 "unsafe-ds-offset-folding", 274 "EnableUnsafeDSOffsetFolding", 275 "true", 276 "Force using DS instruction immediate offsets on SI" 277>; 278 279def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 280 "EnableSIScheduler", 281 "true", 282 "Enable SI Machine Scheduler" 283>; 284 285def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 286 "FlatForGlobal", 287 "true", 288 "Force to generate flat instruction for global" 289>; 290 291// Dummy feature used to disable assembler instructions. 292def FeatureDisable : SubtargetFeature<"", 293 "FeatureDisable","true", 294 "Dummy feature to disable assembler instructions" 295>; 296 297class SubtargetFeatureGeneration <string Value, 298 list<SubtargetFeature> Implies> : 299 SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value, 300 Value#" GPU generation", Implies>; 301 302def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; 303def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; 304def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; 305 306def FeatureR600 : SubtargetFeatureGeneration<"R600", 307 [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0] 308>; 309 310def FeatureR700 : SubtargetFeatureGeneration<"R700", 311 [FeatureFetchLimit16, FeatureLocalMemorySize0] 312>; 313 314def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN", 315 [FeatureFetchLimit16, FeatureLocalMemorySize32768] 316>; 317 318def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", 319 [FeatureFetchLimit16, FeatureWavefrontSize64, 320 FeatureLocalMemorySize32768] 321>; 322 323def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 324 [FeatureFP64, FeatureLocalMemorySize32768, 325 FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding, 326 FeatureLDSBankCount32, FeatureMovrel] 327>; 328 329def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", 330 [FeatureFP64, FeatureLocalMemorySize65536, 331 FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, 332 FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel] 333>; 334 335def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 336 [FeatureFP64, FeatureLocalMemorySize65536, 337 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, 338 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 339 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, 340 FeatureScalarStores, FeatureInv2PiInlineImm 341 ] 342>; 343 344class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping, 345 list<SubtargetFeature> Implies> 346 : SubtargetFeature < 347 "isaver"#Major#"."#Minor#"."#Stepping, 348 "IsaVersion", 349 "ISAVersion"#Major#"_"#Minor#"_"#Stepping, 350 "Instruction set version number", 351 Implies 352>; 353 354def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0, 355 [FeatureSeaIslands, 356 FeatureLDSBankCount32]>; 357 358def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1, 359 [FeatureSeaIslands, 360 HalfRate64Ops, 361 FeatureLDSBankCount32, 362 FeatureFastFMAF32]>; 363 364def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2, 365 [FeatureSeaIslands, 366 FeatureLDSBankCount16]>; 367 368def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0, 369 [FeatureVolcanicIslands, 370 FeatureLDSBankCount32, 371 FeatureSGPRInitBug]>; 372 373def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1, 374 [FeatureVolcanicIslands, 375 FeatureLDSBankCount32, 376 FeatureXNACK]>; 377 378def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2, 379 [FeatureVolcanicIslands, 380 FeatureLDSBankCount32, 381 FeatureSGPRInitBug]>; 382 383def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3, 384 [FeatureVolcanicIslands, 385 FeatureLDSBankCount32]>; 386 387def FeatureISAVersion8_0_4 : SubtargetFeatureISAVersion <8,0,4, 388 [FeatureVolcanicIslands, 389 FeatureLDSBankCount32]>; 390 391def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, 392 [FeatureVolcanicIslands, 393 FeatureLDSBankCount16, 394 FeatureXNACK]>; 395 396//===----------------------------------------------------------------------===// 397// Debugger related subtarget features. 398//===----------------------------------------------------------------------===// 399 400def FeatureDebuggerInsertNops : SubtargetFeature< 401 "amdgpu-debugger-insert-nops", 402 "DebuggerInsertNops", 403 "true", 404 "Insert one nop instruction for each high level source statement" 405>; 406 407def FeatureDebuggerReserveRegs : SubtargetFeature< 408 "amdgpu-debugger-reserve-regs", 409 "DebuggerReserveRegs", 410 "true", 411 "Reserve registers for debugger usage" 412>; 413 414def FeatureDebuggerEmitPrologue : SubtargetFeature< 415 "amdgpu-debugger-emit-prologue", 416 "DebuggerEmitPrologue", 417 "true", 418 "Emit debugger prologue" 419>; 420 421//===----------------------------------------------------------------------===// 422 423def AMDGPUInstrInfo : InstrInfo { 424 let guessInstructionProperties = 1; 425 let noNamedPositionallyEncodedOperands = 1; 426} 427 428def AMDGPUAsmParser : AsmParser { 429 // Some of the R600 registers have the same name, so this crashes. 430 // For example T0_XYZW and T0_XY both have the asm name T0. 431 let ShouldEmitMatchRegisterName = 0; 432} 433 434def AMDGPUAsmWriter : AsmWriter { 435 int PassSubtarget = 1; 436} 437 438def AMDGPUAsmVariants { 439 string Default = "Default"; 440 int Default_ID = 0; 441 string VOP3 = "VOP3"; 442 int VOP3_ID = 1; 443 string SDWA = "SDWA"; 444 int SDWA_ID = 2; 445 string DPP = "DPP"; 446 int DPP_ID = 3; 447 string Disable = "Disable"; 448 int Disable_ID = 4; 449} 450 451def DefaultAMDGPUAsmParserVariant : AsmParserVariant { 452 let Variant = AMDGPUAsmVariants.Default_ID; 453 let Name = AMDGPUAsmVariants.Default; 454} 455 456def VOP3AsmParserVariant : AsmParserVariant { 457 let Variant = AMDGPUAsmVariants.VOP3_ID; 458 let Name = AMDGPUAsmVariants.VOP3; 459} 460 461def SDWAAsmParserVariant : AsmParserVariant { 462 let Variant = AMDGPUAsmVariants.SDWA_ID; 463 let Name = AMDGPUAsmVariants.SDWA; 464} 465 466def DPPAsmParserVariant : AsmParserVariant { 467 let Variant = AMDGPUAsmVariants.DPP_ID; 468 let Name = AMDGPUAsmVariants.DPP; 469} 470 471def AMDGPU : Target { 472 // Pull in Instruction Info: 473 let InstructionSet = AMDGPUInstrInfo; 474 let AssemblyParsers = [AMDGPUAsmParser]; 475 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, 476 VOP3AsmParserVariant, 477 SDWAAsmParserVariant, 478 DPPAsmParserVariant]; 479 let AssemblyWriters = [AMDGPUAsmWriter]; 480} 481 482// Dummy Instruction itineraries for pseudo instructions 483def ALU_NULL : FuncUnit; 484def NullALU : InstrItinClass; 485 486//===----------------------------------------------------------------------===// 487// Predicate helper class 488//===----------------------------------------------------------------------===// 489 490def TruePredicate : Predicate<"true">; 491 492def isSICI : Predicate< 493 "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 494 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" 495>, AssemblerPredicate<"FeatureGCN1Encoding">; 496 497def isVI : Predicate < 498 "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 499 AssemblerPredicate<"FeatureGCN3Encoding">; 500 501def isCIVI : Predicate < 502 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || " 503 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS" 504>, AssemblerPredicate<"FeatureCIInsts">; 505 506def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; 507 508def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">; 509 510class PredicateControl { 511 Predicate SubtargetPredicate; 512 Predicate SIAssemblerPredicate = isSICI; 513 Predicate VIAssemblerPredicate = isVI; 514 list<Predicate> AssemblerPredicates = []; 515 Predicate AssemblerPredicate = TruePredicate; 516 list<Predicate> OtherPredicates = []; 517 list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate], 518 AssemblerPredicates, 519 OtherPredicates); 520} 521 522// Include AMDGPU TD files 523include "R600Schedule.td" 524include "SISchedule.td" 525include "Processors.td" 526include "AMDGPUInstrInfo.td" 527include "AMDGPUIntrinsics.td" 528include "AMDGPURegisterInfo.td" 529include "AMDGPUInstructions.td" 530include "AMDGPUCallingConv.td" 531