1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCInstrInfo.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include "llvm/MC/MCSectionELF.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/MC/SubtargetFeature.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/MathExtras.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <cstdint> 37 #include <cstring> 38 #include <utility> 39 40 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 41 42 #define GET_INSTRINFO_NAMED_OPS 43 #define GET_INSTRMAP_INFO 44 #include "AMDGPUGenInstrInfo.inc" 45 #undef GET_INSTRMAP_INFO 46 #undef GET_INSTRINFO_NAMED_OPS 47 48 namespace { 49 50 /// \returns Bit mask for given bit \p Shift and bit \p Width. 51 unsigned getBitMask(unsigned Shift, unsigned Width) { 52 return ((1 << Width) - 1) << Shift; 53 } 54 55 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 56 /// 57 /// \returns Packed \p Dst. 58 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 59 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 60 Dst |= (Src << Shift) & getBitMask(Shift, Width); 61 return Dst; 62 } 63 64 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 65 /// 66 /// \returns Unpacked bits. 67 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 68 return (Src & getBitMask(Shift, Width)) >> Shift; 69 } 70 71 /// \returns Vmcnt bit shift (lower bits). 72 unsigned getVmcntBitShiftLo() { return 0; } 73 74 /// \returns Vmcnt bit width (lower bits). 75 unsigned getVmcntBitWidthLo() { return 4; } 76 77 /// \returns Expcnt bit shift. 78 unsigned getExpcntBitShift() { return 4; } 79 80 /// \returns Expcnt bit width. 81 unsigned getExpcntBitWidth() { return 3; } 82 83 /// \returns Lgkmcnt bit shift. 84 unsigned getLgkmcntBitShift() { return 8; } 85 86 /// \returns Lgkmcnt bit width. 87 unsigned getLgkmcntBitWidth() { return 4; } 88 89 /// \returns Vmcnt bit shift (higher bits). 90 unsigned getVmcntBitShiftHi() { return 14; } 91 92 /// \returns Vmcnt bit width (higher bits). 93 unsigned getVmcntBitWidthHi() { return 2; } 94 95 } // end namespace anonymous 96 97 namespace llvm { 98 99 static cl::opt<bool> EnablePackedInlinableLiterals( 100 "enable-packed-inlinable-literals", 101 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 102 cl::init(false)); 103 104 namespace AMDGPU { 105 106 LLVM_READNONE 107 static inline Channels indexToChannel(unsigned Channel) { 108 switch (Channel) { 109 case 1: 110 return AMDGPU::Channels_1; 111 case 2: 112 return AMDGPU::Channels_2; 113 case 3: 114 return AMDGPU::Channels_3; 115 case 4: 116 return AMDGPU::Channels_4; 117 default: 118 llvm_unreachable("invalid MIMG channel"); 119 } 120 } 121 122 123 // FIXME: Need to handle d16 images correctly. 124 static unsigned rcToChannels(unsigned RCID) { 125 switch (RCID) { 126 case AMDGPU::VGPR_32RegClassID: 127 return 1; 128 case AMDGPU::VReg_64RegClassID: 129 return 2; 130 case AMDGPU::VReg_96RegClassID: 131 return 3; 132 case AMDGPU::VReg_128RegClassID: 133 return 4; 134 default: 135 llvm_unreachable("invalid MIMG register class"); 136 } 137 } 138 139 int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { 140 AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels); 141 unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); 142 if (NewChannels == OrigChannels) 143 return Opc; 144 145 switch (OrigChannels) { 146 case 1: 147 return AMDGPU::getMaskedMIMGOp1(Opc, Channel); 148 case 2: 149 return AMDGPU::getMaskedMIMGOp2(Opc, Channel); 150 case 3: 151 return AMDGPU::getMaskedMIMGOp3(Opc, Channel); 152 case 4: 153 return AMDGPU::getMaskedMIMGOp4(Opc, Channel); 154 default: 155 llvm_unreachable("invalid MIMG channel"); 156 } 157 } 158 159 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 160 // header files, so we need to wrap it in a function that takes unsigned 161 // instead. 162 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 163 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 164 } 165 166 namespace IsaInfo { 167 168 IsaVersion getIsaVersion(const FeatureBitset &Features) { 169 // GCN GFX6 (Southern Islands (SI)). 170 if (Features.test(FeatureISAVersion6_0_0)) 171 return {6, 0, 0}; 172 if (Features.test(FeatureISAVersion6_0_1)) 173 return {6, 0, 1}; 174 175 // GCN GFX7 (Sea Islands (CI)). 176 if (Features.test(FeatureISAVersion7_0_0)) 177 return {7, 0, 0}; 178 if (Features.test(FeatureISAVersion7_0_1)) 179 return {7, 0, 1}; 180 if (Features.test(FeatureISAVersion7_0_2)) 181 return {7, 0, 2}; 182 if (Features.test(FeatureISAVersion7_0_3)) 183 return {7, 0, 3}; 184 if (Features.test(FeatureISAVersion7_0_4)) 185 return {7, 0, 4}; 186 187 // GCN GFX8 (Volcanic Islands (VI)). 188 if (Features.test(FeatureISAVersion8_0_0)) 189 return {8, 0, 0}; 190 if (Features.test(FeatureISAVersion8_0_1)) 191 return {8, 0, 1}; 192 if (Features.test(FeatureISAVersion8_0_2)) 193 return {8, 0, 2}; 194 if (Features.test(FeatureISAVersion8_0_3)) 195 return {8, 0, 3}; 196 if (Features.test(FeatureISAVersion8_1_0)) 197 return {8, 1, 0}; 198 199 // GCN GFX9. 200 if (Features.test(FeatureISAVersion9_0_0)) 201 return {9, 0, 0}; 202 if (Features.test(FeatureISAVersion9_0_2)) 203 return {9, 0, 2}; 204 205 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 206 return {0, 0, 0}; 207 return {7, 0, 0}; 208 } 209 210 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { 211 auto TargetTriple = STI->getTargetTriple(); 212 auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); 213 214 Stream << TargetTriple.getArchName() << '-' 215 << TargetTriple.getVendorName() << '-' 216 << TargetTriple.getOSName() << '-' 217 << TargetTriple.getEnvironmentName() << '-' 218 << "gfx" 219 << ISAVersion.Major 220 << ISAVersion.Minor 221 << ISAVersion.Stepping; 222 Stream.flush(); 223 } 224 225 bool hasCodeObjectV3(const FeatureBitset &Features) { 226 return Features.test(FeatureCodeObjectV3); 227 } 228 229 unsigned getWavefrontSize(const FeatureBitset &Features) { 230 if (Features.test(FeatureWavefrontSize16)) 231 return 16; 232 if (Features.test(FeatureWavefrontSize32)) 233 return 32; 234 235 return 64; 236 } 237 238 unsigned getLocalMemorySize(const FeatureBitset &Features) { 239 if (Features.test(FeatureLocalMemorySize32768)) 240 return 32768; 241 if (Features.test(FeatureLocalMemorySize65536)) 242 return 65536; 243 244 return 0; 245 } 246 247 unsigned getEUsPerCU(const FeatureBitset &Features) { 248 return 4; 249 } 250 251 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 252 unsigned FlatWorkGroupSize) { 253 if (!Features.test(FeatureGCN)) 254 return 8; 255 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 256 if (N == 1) 257 return 40; 258 N = 40 / N; 259 return std::min(N, 16u); 260 } 261 262 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 263 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 264 } 265 266 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 267 unsigned FlatWorkGroupSize) { 268 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 269 } 270 271 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 272 return 1; 273 } 274 275 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 276 if (!Features.test(FeatureGCN)) 277 return 8; 278 // FIXME: Need to take scratch memory into account. 279 return 10; 280 } 281 282 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 283 unsigned FlatWorkGroupSize) { 284 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 285 getEUsPerCU(Features)) / getEUsPerCU(Features); 286 } 287 288 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 289 return 1; 290 } 291 292 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 293 return 2048; 294 } 295 296 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 297 unsigned FlatWorkGroupSize) { 298 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 299 getWavefrontSize(Features); 300 } 301 302 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 303 IsaVersion Version = getIsaVersion(Features); 304 if (Version.Major >= 8) 305 return 16; 306 return 8; 307 } 308 309 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 310 return 8; 311 } 312 313 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 314 IsaVersion Version = getIsaVersion(Features); 315 if (Version.Major >= 8) 316 return 800; 317 return 512; 318 } 319 320 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 321 if (Features.test(FeatureSGPRInitBug)) 322 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 323 324 IsaVersion Version = getIsaVersion(Features); 325 if (Version.Major >= 8) 326 return 102; 327 return 104; 328 } 329 330 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 331 assert(WavesPerEU != 0); 332 333 if (WavesPerEU >= getMaxWavesPerEU(Features)) 334 return 0; 335 unsigned MinNumSGPRs = 336 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 337 getSGPRAllocGranule(Features)) + 1; 338 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 339 } 340 341 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 342 bool Addressable) { 343 assert(WavesPerEU != 0); 344 345 IsaVersion Version = getIsaVersion(Features); 346 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 347 getSGPRAllocGranule(Features)); 348 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 349 if (Version.Major >= 8 && !Addressable) 350 AddressableNumSGPRs = 112; 351 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 352 } 353 354 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 355 return 4; 356 } 357 358 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 359 return getVGPRAllocGranule(Features); 360 } 361 362 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 363 return 256; 364 } 365 366 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 367 return getTotalNumVGPRs(Features); 368 } 369 370 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 371 assert(WavesPerEU != 0); 372 373 if (WavesPerEU >= getMaxWavesPerEU(Features)) 374 return 0; 375 unsigned MinNumVGPRs = 376 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 377 getVGPRAllocGranule(Features)) + 1; 378 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 379 } 380 381 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 382 assert(WavesPerEU != 0); 383 384 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 385 getVGPRAllocGranule(Features)); 386 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 387 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 388 } 389 390 } // end namespace IsaInfo 391 392 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 393 const FeatureBitset &Features) { 394 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 395 396 memset(&Header, 0, sizeof(Header)); 397 398 Header.amd_kernel_code_version_major = 1; 399 Header.amd_kernel_code_version_minor = 1; 400 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 401 Header.amd_machine_version_major = ISA.Major; 402 Header.amd_machine_version_minor = ISA.Minor; 403 Header.amd_machine_version_stepping = ISA.Stepping; 404 Header.kernel_code_entry_byte_offset = sizeof(Header); 405 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 406 Header.wavefront_size = 6; 407 408 // If the code object does not support indirect functions, then the value must 409 // be 0xffffffff. 410 Header.call_convention = -1; 411 412 // These alignment values are specified in powers of two, so alignment = 413 // 2^n. The minimum alignment is 2^4 = 16. 414 Header.kernarg_segment_alignment = 4; 415 Header.group_segment_alignment = 4; 416 Header.private_segment_alignment = 4; 417 } 418 419 bool isGroupSegment(const GlobalValue *GV) { 420 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 421 } 422 423 bool isGlobalSegment(const GlobalValue *GV) { 424 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 425 } 426 427 bool isReadOnlySegment(const GlobalValue *GV) { 428 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 429 } 430 431 bool shouldEmitConstantsToTextSection(const Triple &TT) { 432 return TT.getOS() != Triple::AMDHSA; 433 } 434 435 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 436 Attribute A = F.getFnAttribute(Name); 437 int Result = Default; 438 439 if (A.isStringAttribute()) { 440 StringRef Str = A.getValueAsString(); 441 if (Str.getAsInteger(0, Result)) { 442 LLVMContext &Ctx = F.getContext(); 443 Ctx.emitError("can't parse integer attribute " + Name); 444 } 445 } 446 447 return Result; 448 } 449 450 std::pair<int, int> getIntegerPairAttribute(const Function &F, 451 StringRef Name, 452 std::pair<int, int> Default, 453 bool OnlyFirstRequired) { 454 Attribute A = F.getFnAttribute(Name); 455 if (!A.isStringAttribute()) 456 return Default; 457 458 LLVMContext &Ctx = F.getContext(); 459 std::pair<int, int> Ints = Default; 460 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 461 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 462 Ctx.emitError("can't parse first integer attribute " + Name); 463 return Default; 464 } 465 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 466 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 467 Ctx.emitError("can't parse second integer attribute " + Name); 468 return Default; 469 } 470 } 471 472 return Ints; 473 } 474 475 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 476 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 477 if (Version.Major < 9) 478 return VmcntLo; 479 480 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 481 return VmcntLo | VmcntHi; 482 } 483 484 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 485 return (1 << getExpcntBitWidth()) - 1; 486 } 487 488 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 489 return (1 << getLgkmcntBitWidth()) - 1; 490 } 491 492 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 493 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 494 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 495 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 496 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 497 if (Version.Major < 9) 498 return Waitcnt; 499 500 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 501 return Waitcnt | VmcntHi; 502 } 503 504 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 505 unsigned VmcntLo = 506 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 507 if (Version.Major < 9) 508 return VmcntLo; 509 510 unsigned VmcntHi = 511 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 512 VmcntHi <<= getVmcntBitWidthLo(); 513 return VmcntLo | VmcntHi; 514 } 515 516 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 517 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 518 } 519 520 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 521 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 522 } 523 524 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 525 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 526 Vmcnt = decodeVmcnt(Version, Waitcnt); 527 Expcnt = decodeExpcnt(Version, Waitcnt); 528 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 529 } 530 531 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 532 unsigned Vmcnt) { 533 Waitcnt = 534 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 535 if (Version.Major < 9) 536 return Waitcnt; 537 538 Vmcnt >>= getVmcntBitWidthLo(); 539 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 540 } 541 542 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 543 unsigned Expcnt) { 544 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 545 } 546 547 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 548 unsigned Lgkmcnt) { 549 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 550 } 551 552 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 553 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 554 unsigned Waitcnt = getWaitcntBitMask(Version); 555 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 556 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 557 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 558 return Waitcnt; 559 } 560 561 unsigned getInitialPSInputAddr(const Function &F) { 562 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 563 } 564 565 bool isShader(CallingConv::ID cc) { 566 switch(cc) { 567 case CallingConv::AMDGPU_VS: 568 case CallingConv::AMDGPU_LS: 569 case CallingConv::AMDGPU_HS: 570 case CallingConv::AMDGPU_ES: 571 case CallingConv::AMDGPU_GS: 572 case CallingConv::AMDGPU_PS: 573 case CallingConv::AMDGPU_CS: 574 return true; 575 default: 576 return false; 577 } 578 } 579 580 bool isCompute(CallingConv::ID cc) { 581 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 582 } 583 584 bool isEntryFunctionCC(CallingConv::ID CC) { 585 switch (CC) { 586 case CallingConv::AMDGPU_KERNEL: 587 case CallingConv::SPIR_KERNEL: 588 case CallingConv::AMDGPU_VS: 589 case CallingConv::AMDGPU_GS: 590 case CallingConv::AMDGPU_PS: 591 case CallingConv::AMDGPU_CS: 592 case CallingConv::AMDGPU_ES: 593 case CallingConv::AMDGPU_HS: 594 case CallingConv::AMDGPU_LS: 595 return true; 596 default: 597 return false; 598 } 599 } 600 601 bool hasXNACK(const MCSubtargetInfo &STI) { 602 return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; 603 } 604 605 bool isSI(const MCSubtargetInfo &STI) { 606 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 607 } 608 609 bool isCI(const MCSubtargetInfo &STI) { 610 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 611 } 612 613 bool isVI(const MCSubtargetInfo &STI) { 614 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 615 } 616 617 bool isGFX9(const MCSubtargetInfo &STI) { 618 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 619 } 620 621 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 622 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 623 } 624 625 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 626 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 627 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 628 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 629 Reg == AMDGPU::SCC; 630 } 631 632 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 633 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 634 if (*R == Reg1) return true; 635 } 636 return false; 637 } 638 639 #define MAP_REG2REG \ 640 using namespace AMDGPU; \ 641 switch(Reg) { \ 642 default: return Reg; \ 643 CASE_CI_VI(FLAT_SCR) \ 644 CASE_CI_VI(FLAT_SCR_LO) \ 645 CASE_CI_VI(FLAT_SCR_HI) \ 646 CASE_VI_GFX9(TTMP0) \ 647 CASE_VI_GFX9(TTMP1) \ 648 CASE_VI_GFX9(TTMP2) \ 649 CASE_VI_GFX9(TTMP3) \ 650 CASE_VI_GFX9(TTMP4) \ 651 CASE_VI_GFX9(TTMP5) \ 652 CASE_VI_GFX9(TTMP6) \ 653 CASE_VI_GFX9(TTMP7) \ 654 CASE_VI_GFX9(TTMP8) \ 655 CASE_VI_GFX9(TTMP9) \ 656 CASE_VI_GFX9(TTMP10) \ 657 CASE_VI_GFX9(TTMP11) \ 658 CASE_VI_GFX9(TTMP12) \ 659 CASE_VI_GFX9(TTMP13) \ 660 CASE_VI_GFX9(TTMP14) \ 661 CASE_VI_GFX9(TTMP15) \ 662 CASE_VI_GFX9(TTMP0_TTMP1) \ 663 CASE_VI_GFX9(TTMP2_TTMP3) \ 664 CASE_VI_GFX9(TTMP4_TTMP5) \ 665 CASE_VI_GFX9(TTMP6_TTMP7) \ 666 CASE_VI_GFX9(TTMP8_TTMP9) \ 667 CASE_VI_GFX9(TTMP10_TTMP11) \ 668 CASE_VI_GFX9(TTMP12_TTMP13) \ 669 CASE_VI_GFX9(TTMP14_TTMP15) \ 670 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \ 671 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \ 672 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \ 673 CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ 674 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 675 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 676 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 677 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 678 } 679 680 #define CASE_CI_VI(node) \ 681 assert(!isSI(STI)); \ 682 case node: return isCI(STI) ? node##_ci : node##_vi; 683 684 #define CASE_VI_GFX9(node) \ 685 case node: return isGFX9(STI) ? node##_gfx9 : node##_vi; 686 687 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 688 MAP_REG2REG 689 } 690 691 #undef CASE_CI_VI 692 #undef CASE_VI_GFX9 693 694 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 695 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node; 696 697 unsigned mc2PseudoReg(unsigned Reg) { 698 MAP_REG2REG 699 } 700 701 #undef CASE_CI_VI 702 #undef CASE_VI_GFX9 703 #undef MAP_REG2REG 704 705 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 706 assert(OpNo < Desc.NumOperands); 707 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 708 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 709 OpType <= AMDGPU::OPERAND_SRC_LAST; 710 } 711 712 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 713 assert(OpNo < Desc.NumOperands); 714 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 715 switch (OpType) { 716 case AMDGPU::OPERAND_REG_IMM_FP32: 717 case AMDGPU::OPERAND_REG_IMM_FP64: 718 case AMDGPU::OPERAND_REG_IMM_FP16: 719 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 720 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 721 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 722 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 723 return true; 724 default: 725 return false; 726 } 727 } 728 729 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 730 assert(OpNo < Desc.NumOperands); 731 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 732 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 733 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 734 } 735 736 // Avoid using MCRegisterClass::getSize, since that function will go away 737 // (move from MC* level to Target* level). Return size in bits. 738 unsigned getRegBitWidth(unsigned RCID) { 739 switch (RCID) { 740 case AMDGPU::SGPR_32RegClassID: 741 case AMDGPU::VGPR_32RegClassID: 742 case AMDGPU::VS_32RegClassID: 743 case AMDGPU::SReg_32RegClassID: 744 case AMDGPU::SReg_32_XM0RegClassID: 745 return 32; 746 case AMDGPU::SGPR_64RegClassID: 747 case AMDGPU::VS_64RegClassID: 748 case AMDGPU::SReg_64RegClassID: 749 case AMDGPU::VReg_64RegClassID: 750 return 64; 751 case AMDGPU::VReg_96RegClassID: 752 return 96; 753 case AMDGPU::SGPR_128RegClassID: 754 case AMDGPU::SReg_128RegClassID: 755 case AMDGPU::VReg_128RegClassID: 756 return 128; 757 case AMDGPU::SReg_256RegClassID: 758 case AMDGPU::VReg_256RegClassID: 759 return 256; 760 case AMDGPU::SReg_512RegClassID: 761 case AMDGPU::VReg_512RegClassID: 762 return 512; 763 default: 764 llvm_unreachable("Unexpected register class"); 765 } 766 } 767 768 unsigned getRegBitWidth(const MCRegisterClass &RC) { 769 return getRegBitWidth(RC.getID()); 770 } 771 772 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 773 unsigned OpNo) { 774 assert(OpNo < Desc.NumOperands); 775 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 776 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 777 } 778 779 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 780 if (Literal >= -16 && Literal <= 64) 781 return true; 782 783 uint64_t Val = static_cast<uint64_t>(Literal); 784 return (Val == DoubleToBits(0.0)) || 785 (Val == DoubleToBits(1.0)) || 786 (Val == DoubleToBits(-1.0)) || 787 (Val == DoubleToBits(0.5)) || 788 (Val == DoubleToBits(-0.5)) || 789 (Val == DoubleToBits(2.0)) || 790 (Val == DoubleToBits(-2.0)) || 791 (Val == DoubleToBits(4.0)) || 792 (Val == DoubleToBits(-4.0)) || 793 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 794 } 795 796 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 797 if (Literal >= -16 && Literal <= 64) 798 return true; 799 800 // The actual type of the operand does not seem to matter as long 801 // as the bits match one of the inline immediate values. For example: 802 // 803 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 804 // so it is a legal inline immediate. 805 // 806 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 807 // floating-point, so it is a legal inline immediate. 808 809 uint32_t Val = static_cast<uint32_t>(Literal); 810 return (Val == FloatToBits(0.0f)) || 811 (Val == FloatToBits(1.0f)) || 812 (Val == FloatToBits(-1.0f)) || 813 (Val == FloatToBits(0.5f)) || 814 (Val == FloatToBits(-0.5f)) || 815 (Val == FloatToBits(2.0f)) || 816 (Val == FloatToBits(-2.0f)) || 817 (Val == FloatToBits(4.0f)) || 818 (Val == FloatToBits(-4.0f)) || 819 (Val == 0x3e22f983 && HasInv2Pi); 820 } 821 822 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 823 if (!HasInv2Pi) 824 return false; 825 826 if (Literal >= -16 && Literal <= 64) 827 return true; 828 829 uint16_t Val = static_cast<uint16_t>(Literal); 830 return Val == 0x3C00 || // 1.0 831 Val == 0xBC00 || // -1.0 832 Val == 0x3800 || // 0.5 833 Val == 0xB800 || // -0.5 834 Val == 0x4000 || // 2.0 835 Val == 0xC000 || // -2.0 836 Val == 0x4400 || // 4.0 837 Val == 0xC400 || // -4.0 838 Val == 0x3118; // 1/2pi 839 } 840 841 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 842 assert(HasInv2Pi); 843 844 if (!EnablePackedInlinableLiterals) 845 return false; 846 847 int16_t Lo16 = static_cast<int16_t>(Literal); 848 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 849 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 850 } 851 852 bool isArgPassedInSGPR(const Argument *A) { 853 const Function *F = A->getParent(); 854 855 // Arguments to compute shaders are never a source of divergence. 856 CallingConv::ID CC = F->getCallingConv(); 857 switch (CC) { 858 case CallingConv::AMDGPU_KERNEL: 859 case CallingConv::SPIR_KERNEL: 860 return true; 861 case CallingConv::AMDGPU_VS: 862 case CallingConv::AMDGPU_LS: 863 case CallingConv::AMDGPU_HS: 864 case CallingConv::AMDGPU_ES: 865 case CallingConv::AMDGPU_GS: 866 case CallingConv::AMDGPU_PS: 867 case CallingConv::AMDGPU_CS: 868 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 869 // Everything else is in VGPRs. 870 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 871 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 872 default: 873 // TODO: Should calls support inreg for SGPR inputs? 874 return false; 875 } 876 } 877 878 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. 879 bool isUniformMMO(const MachineMemOperand *MMO) { 880 const Value *Ptr = MMO->getValue(); 881 // UndefValue means this is a load of a kernel input. These are uniform. 882 // Sometimes LDS instructions have constant pointers. 883 // If Ptr is null, then that means this mem operand contains a 884 // PseudoSourceValue like GOT. 885 if (!Ptr || isa<UndefValue>(Ptr) || 886 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 887 return true; 888 889 if (const Argument *Arg = dyn_cast<Argument>(Ptr)) 890 return isArgPassedInSGPR(Arg); 891 892 const Instruction *I = dyn_cast<Instruction>(Ptr); 893 return I && I->getMetadata("amdgpu.uniform"); 894 } 895 896 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 897 if (isGCN3Encoding(ST)) 898 return ByteOffset; 899 return ByteOffset >> 2; 900 } 901 902 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 903 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 904 return isGCN3Encoding(ST) ? 905 isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 906 } 907 908 } // end namespace AMDGPU 909 910 } // end namespace llvm 911 912 namespace llvm { 913 namespace AMDGPU { 914 915 AMDGPUAS getAMDGPUAS(Triple T) { 916 auto Env = T.getEnvironmentName(); 917 AMDGPUAS AS; 918 if (Env == "amdgiz" || Env == "amdgizcl") { 919 AS.FLAT_ADDRESS = 0; 920 AS.PRIVATE_ADDRESS = 5; 921 AS.REGION_ADDRESS = 4; 922 } 923 else { 924 AS.FLAT_ADDRESS = 4; 925 AS.PRIVATE_ADDRESS = 0; 926 AS.REGION_ADDRESS = 5; 927 } 928 return AS; 929 } 930 931 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 932 return getAMDGPUAS(M.getTargetTriple()); 933 } 934 935 AMDGPUAS getAMDGPUAS(const Module &M) { 936 return getAMDGPUAS(Triple(M.getTargetTriple())); 937 } 938 } // namespace AMDGPU 939 } // namespace llvm 940