1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPUTargetTransformInfo.h" 12 #include "AMDGPU.h" 13 #include "SIDefines.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/ADT/Triple.h" 16 #include "llvm/BinaryFormat/ELF.h" 17 #include "llvm/CodeGen/MachineMemOperand.h" 18 #include "llvm/IR/Attributes.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/GlobalValue.h" 22 #include "llvm/IR/Instruction.h" 23 #include "llvm/IR/LLVMContext.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCInstrDesc.h" 27 #include "llvm/MC/MCInstrInfo.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/MC/MCSectionELF.h" 30 #include "llvm/MC/MCSubtargetInfo.h" 31 #include "llvm/MC/SubtargetFeature.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include <algorithm> 36 #include <cassert> 37 #include <cstdint> 38 #include <cstring> 39 #include <utility> 40 41 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 42 43 #define GET_INSTRINFO_NAMED_OPS 44 #define GET_INSTRMAP_INFO 45 #include "AMDGPUGenInstrInfo.inc" 46 #undef GET_INSTRMAP_INFO 47 #undef GET_INSTRINFO_NAMED_OPS 48 49 namespace { 50 51 /// \returns Bit mask for given bit \p Shift and bit \p Width. 52 unsigned getBitMask(unsigned Shift, unsigned Width) { 53 return ((1 << Width) - 1) << Shift; 54 } 55 56 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 57 /// 58 /// \returns Packed \p Dst. 59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 60 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 61 Dst |= (Src << Shift) & getBitMask(Shift, Width); 62 return Dst; 63 } 64 65 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 66 /// 67 /// \returns Unpacked bits. 68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 69 return (Src & getBitMask(Shift, Width)) >> Shift; 70 } 71 72 /// \returns Vmcnt bit shift (lower bits). 73 unsigned getVmcntBitShiftLo() { return 0; } 74 75 /// \returns Vmcnt bit width (lower bits). 76 unsigned getVmcntBitWidthLo() { return 4; } 77 78 /// \returns Expcnt bit shift. 79 unsigned getExpcntBitShift() { return 4; } 80 81 /// \returns Expcnt bit width. 82 unsigned getExpcntBitWidth() { return 3; } 83 84 /// \returns Lgkmcnt bit shift. 85 unsigned getLgkmcntBitShift() { return 8; } 86 87 /// \returns Lgkmcnt bit width. 88 unsigned getLgkmcntBitWidth() { return 4; } 89 90 /// \returns Vmcnt bit shift (higher bits). 91 unsigned getVmcntBitShiftHi() { return 14; } 92 93 /// \returns Vmcnt bit width (higher bits). 94 unsigned getVmcntBitWidthHi() { return 2; } 95 96 } // end namespace anonymous 97 98 namespace llvm { 99 100 static cl::opt<bool> EnablePackedInlinableLiterals( 101 "enable-packed-inlinable-literals", 102 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 103 cl::init(false)); 104 105 namespace AMDGPU { 106 107 LLVM_READNONE 108 static inline Channels indexToChannel(unsigned Channel) { 109 switch (Channel) { 110 case 1: 111 return AMDGPU::Channels_1; 112 case 2: 113 return AMDGPU::Channels_2; 114 case 3: 115 return AMDGPU::Channels_3; 116 case 4: 117 return AMDGPU::Channels_4; 118 default: 119 llvm_unreachable("invalid MIMG channel"); 120 } 121 } 122 123 124 // FIXME: Need to handle d16 images correctly. 125 static unsigned rcToChannels(unsigned RCID) { 126 switch (RCID) { 127 case AMDGPU::VGPR_32RegClassID: 128 return 1; 129 case AMDGPU::VReg_64RegClassID: 130 return 2; 131 case AMDGPU::VReg_96RegClassID: 132 return 3; 133 case AMDGPU::VReg_128RegClassID: 134 return 4; 135 default: 136 llvm_unreachable("invalid MIMG register class"); 137 } 138 } 139 140 int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { 141 AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels); 142 unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); 143 if (NewChannels == OrigChannels) 144 return Opc; 145 146 switch (OrigChannels) { 147 case 1: 148 return AMDGPU::getMaskedMIMGOp1(Opc, Channel); 149 case 2: 150 return AMDGPU::getMaskedMIMGOp2(Opc, Channel); 151 case 3: 152 return AMDGPU::getMaskedMIMGOp3(Opc, Channel); 153 case 4: 154 return AMDGPU::getMaskedMIMGOp4(Opc, Channel); 155 default: 156 llvm_unreachable("invalid MIMG channel"); 157 } 158 } 159 160 int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { 161 assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1); 162 assert(NewChannels == 1 || NewChannels == 2 || NewChannels == 4); 163 164 unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); 165 assert(OrigChannels == 1 || OrigChannels == 2 || OrigChannels == 4); 166 167 if (NewChannels == OrigChannels) return Opc; 168 169 if (OrigChannels <= 2 && NewChannels <= 2) { 170 // This is an ordinary atomic (not an atomic_cmpswap) 171 return (OrigChannels == 1)? 172 AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc); 173 } else if (OrigChannels >= 2 && NewChannels >= 2) { 174 // This is an atomic_cmpswap 175 return (OrigChannels == 2)? 176 AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc); 177 } else { // invalid OrigChannels/NewChannels value 178 return -1; 179 } 180 } 181 182 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 183 // header files, so we need to wrap it in a function that takes unsigned 184 // instead. 185 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 186 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 187 } 188 189 namespace IsaInfo { 190 191 IsaVersion getIsaVersion(const FeatureBitset &Features) { 192 // GCN GFX6 (Southern Islands (SI)). 193 if (Features.test(FeatureISAVersion6_0_0)) 194 return {6, 0, 0}; 195 if (Features.test(FeatureISAVersion6_0_1)) 196 return {6, 0, 1}; 197 198 // GCN GFX7 (Sea Islands (CI)). 199 if (Features.test(FeatureISAVersion7_0_0)) 200 return {7, 0, 0}; 201 if (Features.test(FeatureISAVersion7_0_1)) 202 return {7, 0, 1}; 203 if (Features.test(FeatureISAVersion7_0_2)) 204 return {7, 0, 2}; 205 if (Features.test(FeatureISAVersion7_0_3)) 206 return {7, 0, 3}; 207 if (Features.test(FeatureISAVersion7_0_4)) 208 return {7, 0, 4}; 209 if (Features.test(FeatureSeaIslands)) 210 return {7, 0, 0}; 211 212 // GCN GFX8 (Volcanic Islands (VI)). 213 if (Features.test(FeatureISAVersion8_0_1)) 214 return {8, 0, 1}; 215 if (Features.test(FeatureISAVersion8_0_2)) 216 return {8, 0, 2}; 217 if (Features.test(FeatureISAVersion8_0_3)) 218 return {8, 0, 3}; 219 if (Features.test(FeatureISAVersion8_1_0)) 220 return {8, 1, 0}; 221 if (Features.test(FeatureVolcanicIslands)) 222 return {8, 0, 0}; 223 224 // GCN GFX9. 225 if (Features.test(FeatureISAVersion9_0_0)) 226 return {9, 0, 0}; 227 if (Features.test(FeatureISAVersion9_0_2)) 228 return {9, 0, 2}; 229 if (Features.test(FeatureGFX9)) 230 return {9, 0, 0}; 231 232 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 233 return {0, 0, 0}; 234 return {7, 0, 0}; 235 } 236 237 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { 238 auto TargetTriple = STI->getTargetTriple(); 239 auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); 240 241 Stream << TargetTriple.getArchName() << '-' 242 << TargetTriple.getVendorName() << '-' 243 << TargetTriple.getOSName() << '-' 244 << TargetTriple.getEnvironmentName() << '-' 245 << "gfx" 246 << ISAVersion.Major 247 << ISAVersion.Minor 248 << ISAVersion.Stepping; 249 Stream.flush(); 250 } 251 252 bool hasCodeObjectV3(const FeatureBitset &Features) { 253 return Features.test(FeatureCodeObjectV3); 254 } 255 256 unsigned getWavefrontSize(const FeatureBitset &Features) { 257 if (Features.test(FeatureWavefrontSize16)) 258 return 16; 259 if (Features.test(FeatureWavefrontSize32)) 260 return 32; 261 262 return 64; 263 } 264 265 unsigned getLocalMemorySize(const FeatureBitset &Features) { 266 if (Features.test(FeatureLocalMemorySize32768)) 267 return 32768; 268 if (Features.test(FeatureLocalMemorySize65536)) 269 return 65536; 270 271 return 0; 272 } 273 274 unsigned getEUsPerCU(const FeatureBitset &Features) { 275 return 4; 276 } 277 278 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 279 unsigned FlatWorkGroupSize) { 280 if (!Features.test(FeatureGCN)) 281 return 8; 282 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 283 if (N == 1) 284 return 40; 285 N = 40 / N; 286 return std::min(N, 16u); 287 } 288 289 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 290 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 291 } 292 293 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 294 unsigned FlatWorkGroupSize) { 295 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 296 } 297 298 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 299 return 1; 300 } 301 302 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 303 if (!Features.test(FeatureGCN)) 304 return 8; 305 // FIXME: Need to take scratch memory into account. 306 return 10; 307 } 308 309 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 310 unsigned FlatWorkGroupSize) { 311 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 312 getEUsPerCU(Features)) / getEUsPerCU(Features); 313 } 314 315 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 316 return 1; 317 } 318 319 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 320 return 2048; 321 } 322 323 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 324 unsigned FlatWorkGroupSize) { 325 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 326 getWavefrontSize(Features); 327 } 328 329 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 330 IsaVersion Version = getIsaVersion(Features); 331 if (Version.Major >= 8) 332 return 16; 333 return 8; 334 } 335 336 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 337 return 8; 338 } 339 340 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 341 IsaVersion Version = getIsaVersion(Features); 342 if (Version.Major >= 8) 343 return 800; 344 return 512; 345 } 346 347 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 348 if (Features.test(FeatureSGPRInitBug)) 349 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 350 351 IsaVersion Version = getIsaVersion(Features); 352 if (Version.Major >= 8) 353 return 102; 354 return 104; 355 } 356 357 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 358 assert(WavesPerEU != 0); 359 360 if (WavesPerEU >= getMaxWavesPerEU(Features)) 361 return 0; 362 unsigned MinNumSGPRs = 363 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 364 getSGPRAllocGranule(Features)) + 1; 365 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 366 } 367 368 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 369 bool Addressable) { 370 assert(WavesPerEU != 0); 371 372 IsaVersion Version = getIsaVersion(Features); 373 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 374 getSGPRAllocGranule(Features)); 375 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 376 if (Version.Major >= 8 && !Addressable) 377 AddressableNumSGPRs = 112; 378 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 379 } 380 381 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 382 return 4; 383 } 384 385 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 386 return getVGPRAllocGranule(Features); 387 } 388 389 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 390 return 256; 391 } 392 393 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 394 return getTotalNumVGPRs(Features); 395 } 396 397 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 398 assert(WavesPerEU != 0); 399 400 if (WavesPerEU >= getMaxWavesPerEU(Features)) 401 return 0; 402 unsigned MinNumVGPRs = 403 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 404 getVGPRAllocGranule(Features)) + 1; 405 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 406 } 407 408 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 409 assert(WavesPerEU != 0); 410 411 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 412 getVGPRAllocGranule(Features)); 413 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 414 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 415 } 416 417 } // end namespace IsaInfo 418 419 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 420 const FeatureBitset &Features) { 421 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 422 423 memset(&Header, 0, sizeof(Header)); 424 425 Header.amd_kernel_code_version_major = 1; 426 Header.amd_kernel_code_version_minor = 2; 427 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 428 Header.amd_machine_version_major = ISA.Major; 429 Header.amd_machine_version_minor = ISA.Minor; 430 Header.amd_machine_version_stepping = ISA.Stepping; 431 Header.kernel_code_entry_byte_offset = sizeof(Header); 432 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 433 Header.wavefront_size = 6; 434 435 // If the code object does not support indirect functions, then the value must 436 // be 0xffffffff. 437 Header.call_convention = -1; 438 439 // These alignment values are specified in powers of two, so alignment = 440 // 2^n. The minimum alignment is 2^4 = 16. 441 Header.kernarg_segment_alignment = 4; 442 Header.group_segment_alignment = 4; 443 Header.private_segment_alignment = 4; 444 } 445 446 bool isGroupSegment(const GlobalValue *GV) { 447 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 448 } 449 450 bool isGlobalSegment(const GlobalValue *GV) { 451 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 452 } 453 454 bool isReadOnlySegment(const GlobalValue *GV) { 455 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || 456 GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 457 } 458 459 bool shouldEmitConstantsToTextSection(const Triple &TT) { 460 return TT.getOS() != Triple::AMDHSA; 461 } 462 463 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 464 Attribute A = F.getFnAttribute(Name); 465 int Result = Default; 466 467 if (A.isStringAttribute()) { 468 StringRef Str = A.getValueAsString(); 469 if (Str.getAsInteger(0, Result)) { 470 LLVMContext &Ctx = F.getContext(); 471 Ctx.emitError("can't parse integer attribute " + Name); 472 } 473 } 474 475 return Result; 476 } 477 478 std::pair<int, int> getIntegerPairAttribute(const Function &F, 479 StringRef Name, 480 std::pair<int, int> Default, 481 bool OnlyFirstRequired) { 482 Attribute A = F.getFnAttribute(Name); 483 if (!A.isStringAttribute()) 484 return Default; 485 486 LLVMContext &Ctx = F.getContext(); 487 std::pair<int, int> Ints = Default; 488 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 489 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 490 Ctx.emitError("can't parse first integer attribute " + Name); 491 return Default; 492 } 493 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 494 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 495 Ctx.emitError("can't parse second integer attribute " + Name); 496 return Default; 497 } 498 } 499 500 return Ints; 501 } 502 503 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 504 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 505 if (Version.Major < 9) 506 return VmcntLo; 507 508 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 509 return VmcntLo | VmcntHi; 510 } 511 512 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 513 return (1 << getExpcntBitWidth()) - 1; 514 } 515 516 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 517 return (1 << getLgkmcntBitWidth()) - 1; 518 } 519 520 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 521 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 522 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 523 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 524 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 525 if (Version.Major < 9) 526 return Waitcnt; 527 528 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 529 return Waitcnt | VmcntHi; 530 } 531 532 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 533 unsigned VmcntLo = 534 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 535 if (Version.Major < 9) 536 return VmcntLo; 537 538 unsigned VmcntHi = 539 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 540 VmcntHi <<= getVmcntBitWidthLo(); 541 return VmcntLo | VmcntHi; 542 } 543 544 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 545 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 546 } 547 548 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 549 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 550 } 551 552 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 553 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 554 Vmcnt = decodeVmcnt(Version, Waitcnt); 555 Expcnt = decodeExpcnt(Version, Waitcnt); 556 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 557 } 558 559 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 560 unsigned Vmcnt) { 561 Waitcnt = 562 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 563 if (Version.Major < 9) 564 return Waitcnt; 565 566 Vmcnt >>= getVmcntBitWidthLo(); 567 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 568 } 569 570 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 571 unsigned Expcnt) { 572 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 573 } 574 575 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 576 unsigned Lgkmcnt) { 577 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 578 } 579 580 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 581 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 582 unsigned Waitcnt = getWaitcntBitMask(Version); 583 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 584 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 585 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 586 return Waitcnt; 587 } 588 589 unsigned getInitialPSInputAddr(const Function &F) { 590 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 591 } 592 593 bool isShader(CallingConv::ID cc) { 594 switch(cc) { 595 case CallingConv::AMDGPU_VS: 596 case CallingConv::AMDGPU_LS: 597 case CallingConv::AMDGPU_HS: 598 case CallingConv::AMDGPU_ES: 599 case CallingConv::AMDGPU_GS: 600 case CallingConv::AMDGPU_PS: 601 case CallingConv::AMDGPU_CS: 602 return true; 603 default: 604 return false; 605 } 606 } 607 608 bool isCompute(CallingConv::ID cc) { 609 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 610 } 611 612 bool isEntryFunctionCC(CallingConv::ID CC) { 613 switch (CC) { 614 case CallingConv::AMDGPU_KERNEL: 615 case CallingConv::SPIR_KERNEL: 616 case CallingConv::AMDGPU_VS: 617 case CallingConv::AMDGPU_GS: 618 case CallingConv::AMDGPU_PS: 619 case CallingConv::AMDGPU_CS: 620 case CallingConv::AMDGPU_ES: 621 case CallingConv::AMDGPU_HS: 622 case CallingConv::AMDGPU_LS: 623 return true; 624 default: 625 return false; 626 } 627 } 628 629 bool hasXNACK(const MCSubtargetInfo &STI) { 630 return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; 631 } 632 633 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 634 return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; 635 } 636 637 bool hasPackedD16(const MCSubtargetInfo &STI) { 638 return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]; 639 } 640 641 bool isSI(const MCSubtargetInfo &STI) { 642 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 643 } 644 645 bool isCI(const MCSubtargetInfo &STI) { 646 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 647 } 648 649 bool isVI(const MCSubtargetInfo &STI) { 650 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 651 } 652 653 bool isGFX9(const MCSubtargetInfo &STI) { 654 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 655 } 656 657 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 658 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 659 } 660 661 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 662 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 663 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 664 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 665 Reg == AMDGPU::SCC; 666 } 667 668 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 669 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 670 if (*R == Reg1) return true; 671 } 672 return false; 673 } 674 675 #define MAP_REG2REG \ 676 using namespace AMDGPU; \ 677 switch(Reg) { \ 678 default: return Reg; \ 679 CASE_CI_VI(FLAT_SCR) \ 680 CASE_CI_VI(FLAT_SCR_LO) \ 681 CASE_CI_VI(FLAT_SCR_HI) \ 682 CASE_VI_GFX9(TTMP0) \ 683 CASE_VI_GFX9(TTMP1) \ 684 CASE_VI_GFX9(TTMP2) \ 685 CASE_VI_GFX9(TTMP3) \ 686 CASE_VI_GFX9(TTMP4) \ 687 CASE_VI_GFX9(TTMP5) \ 688 CASE_VI_GFX9(TTMP6) \ 689 CASE_VI_GFX9(TTMP7) \ 690 CASE_VI_GFX9(TTMP8) \ 691 CASE_VI_GFX9(TTMP9) \ 692 CASE_VI_GFX9(TTMP10) \ 693 CASE_VI_GFX9(TTMP11) \ 694 CASE_VI_GFX9(TTMP12) \ 695 CASE_VI_GFX9(TTMP13) \ 696 CASE_VI_GFX9(TTMP14) \ 697 CASE_VI_GFX9(TTMP15) \ 698 CASE_VI_GFX9(TTMP0_TTMP1) \ 699 CASE_VI_GFX9(TTMP2_TTMP3) \ 700 CASE_VI_GFX9(TTMP4_TTMP5) \ 701 CASE_VI_GFX9(TTMP6_TTMP7) \ 702 CASE_VI_GFX9(TTMP8_TTMP9) \ 703 CASE_VI_GFX9(TTMP10_TTMP11) \ 704 CASE_VI_GFX9(TTMP12_TTMP13) \ 705 CASE_VI_GFX9(TTMP14_TTMP15) \ 706 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \ 707 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \ 708 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \ 709 CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ 710 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 711 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 712 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 713 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 714 } 715 716 #define CASE_CI_VI(node) \ 717 assert(!isSI(STI)); \ 718 case node: return isCI(STI) ? node##_ci : node##_vi; 719 720 #define CASE_VI_GFX9(node) \ 721 case node: return isGFX9(STI) ? node##_gfx9 : node##_vi; 722 723 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 724 MAP_REG2REG 725 } 726 727 #undef CASE_CI_VI 728 #undef CASE_VI_GFX9 729 730 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 731 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node; 732 733 unsigned mc2PseudoReg(unsigned Reg) { 734 MAP_REG2REG 735 } 736 737 #undef CASE_CI_VI 738 #undef CASE_VI_GFX9 739 #undef MAP_REG2REG 740 741 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 742 assert(OpNo < Desc.NumOperands); 743 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 744 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 745 OpType <= AMDGPU::OPERAND_SRC_LAST; 746 } 747 748 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 749 assert(OpNo < Desc.NumOperands); 750 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 751 switch (OpType) { 752 case AMDGPU::OPERAND_REG_IMM_FP32: 753 case AMDGPU::OPERAND_REG_IMM_FP64: 754 case AMDGPU::OPERAND_REG_IMM_FP16: 755 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 756 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 757 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 758 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 759 return true; 760 default: 761 return false; 762 } 763 } 764 765 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 766 assert(OpNo < Desc.NumOperands); 767 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 768 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 769 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 770 } 771 772 // Avoid using MCRegisterClass::getSize, since that function will go away 773 // (move from MC* level to Target* level). Return size in bits. 774 unsigned getRegBitWidth(unsigned RCID) { 775 switch (RCID) { 776 case AMDGPU::SGPR_32RegClassID: 777 case AMDGPU::VGPR_32RegClassID: 778 case AMDGPU::VS_32RegClassID: 779 case AMDGPU::SReg_32RegClassID: 780 case AMDGPU::SReg_32_XM0RegClassID: 781 return 32; 782 case AMDGPU::SGPR_64RegClassID: 783 case AMDGPU::VS_64RegClassID: 784 case AMDGPU::SReg_64RegClassID: 785 case AMDGPU::VReg_64RegClassID: 786 return 64; 787 case AMDGPU::VReg_96RegClassID: 788 return 96; 789 case AMDGPU::SGPR_128RegClassID: 790 case AMDGPU::SReg_128RegClassID: 791 case AMDGPU::VReg_128RegClassID: 792 return 128; 793 case AMDGPU::SReg_256RegClassID: 794 case AMDGPU::VReg_256RegClassID: 795 return 256; 796 case AMDGPU::SReg_512RegClassID: 797 case AMDGPU::VReg_512RegClassID: 798 return 512; 799 default: 800 llvm_unreachable("Unexpected register class"); 801 } 802 } 803 804 unsigned getRegBitWidth(const MCRegisterClass &RC) { 805 return getRegBitWidth(RC.getID()); 806 } 807 808 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 809 unsigned OpNo) { 810 assert(OpNo < Desc.NumOperands); 811 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 812 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 813 } 814 815 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 816 if (Literal >= -16 && Literal <= 64) 817 return true; 818 819 uint64_t Val = static_cast<uint64_t>(Literal); 820 return (Val == DoubleToBits(0.0)) || 821 (Val == DoubleToBits(1.0)) || 822 (Val == DoubleToBits(-1.0)) || 823 (Val == DoubleToBits(0.5)) || 824 (Val == DoubleToBits(-0.5)) || 825 (Val == DoubleToBits(2.0)) || 826 (Val == DoubleToBits(-2.0)) || 827 (Val == DoubleToBits(4.0)) || 828 (Val == DoubleToBits(-4.0)) || 829 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 830 } 831 832 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 833 if (Literal >= -16 && Literal <= 64) 834 return true; 835 836 // The actual type of the operand does not seem to matter as long 837 // as the bits match one of the inline immediate values. For example: 838 // 839 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 840 // so it is a legal inline immediate. 841 // 842 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 843 // floating-point, so it is a legal inline immediate. 844 845 uint32_t Val = static_cast<uint32_t>(Literal); 846 return (Val == FloatToBits(0.0f)) || 847 (Val == FloatToBits(1.0f)) || 848 (Val == FloatToBits(-1.0f)) || 849 (Val == FloatToBits(0.5f)) || 850 (Val == FloatToBits(-0.5f)) || 851 (Val == FloatToBits(2.0f)) || 852 (Val == FloatToBits(-2.0f)) || 853 (Val == FloatToBits(4.0f)) || 854 (Val == FloatToBits(-4.0f)) || 855 (Val == 0x3e22f983 && HasInv2Pi); 856 } 857 858 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 859 if (!HasInv2Pi) 860 return false; 861 862 if (Literal >= -16 && Literal <= 64) 863 return true; 864 865 uint16_t Val = static_cast<uint16_t>(Literal); 866 return Val == 0x3C00 || // 1.0 867 Val == 0xBC00 || // -1.0 868 Val == 0x3800 || // 0.5 869 Val == 0xB800 || // -0.5 870 Val == 0x4000 || // 2.0 871 Val == 0xC000 || // -2.0 872 Val == 0x4400 || // 4.0 873 Val == 0xC400 || // -4.0 874 Val == 0x3118; // 1/2pi 875 } 876 877 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 878 assert(HasInv2Pi); 879 880 if (!EnablePackedInlinableLiterals) 881 return false; 882 883 int16_t Lo16 = static_cast<int16_t>(Literal); 884 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 885 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 886 } 887 888 bool isArgPassedInSGPR(const Argument *A) { 889 const Function *F = A->getParent(); 890 891 // Arguments to compute shaders are never a source of divergence. 892 CallingConv::ID CC = F->getCallingConv(); 893 switch (CC) { 894 case CallingConv::AMDGPU_KERNEL: 895 case CallingConv::SPIR_KERNEL: 896 return true; 897 case CallingConv::AMDGPU_VS: 898 case CallingConv::AMDGPU_LS: 899 case CallingConv::AMDGPU_HS: 900 case CallingConv::AMDGPU_ES: 901 case CallingConv::AMDGPU_GS: 902 case CallingConv::AMDGPU_PS: 903 case CallingConv::AMDGPU_CS: 904 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 905 // Everything else is in VGPRs. 906 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 907 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 908 default: 909 // TODO: Should calls support inreg for SGPR inputs? 910 return false; 911 } 912 } 913 914 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 915 if (isGCN3Encoding(ST)) 916 return ByteOffset; 917 return ByteOffset >> 2; 918 } 919 920 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 921 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 922 return isGCN3Encoding(ST) ? 923 isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 924 } 925 926 } // end namespace AMDGPU 927 928 } // end namespace llvm 929 930 namespace llvm { 931 namespace AMDGPU { 932 933 AMDGPUAS getAMDGPUAS(Triple T) { 934 AMDGPUAS AS; 935 AS.FLAT_ADDRESS = 0; 936 AS.PRIVATE_ADDRESS = 5; 937 AS.REGION_ADDRESS = 2; 938 return AS; 939 } 940 941 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 942 return getAMDGPUAS(M.getTargetTriple()); 943 } 944 945 AMDGPUAS getAMDGPUAS(const Module &M) { 946 return getAMDGPUAS(Triple(M.getTargetTriple())); 947 } 948 949 namespace { 950 951 struct SourceOfDivergence { 952 unsigned Intr; 953 }; 954 const SourceOfDivergence *lookupSourceOfDivergenceByIntr(unsigned Intr); 955 956 #define GET_SOURCEOFDIVERGENCE_IMPL 957 #include "AMDGPUGenSearchableTables.inc" 958 959 } // end anonymous namespace 960 961 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 962 return lookupSourceOfDivergenceByIntr(IntrID); 963 } 964 } // namespace AMDGPU 965 } // namespace llvm 966