1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCInstrInfo.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include "llvm/MC/MCSectionELF.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/MC/SubtargetFeature.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/MathExtras.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <cstdint> 37 #include <cstring> 38 #include <utility> 39 40 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 41 42 #define GET_INSTRINFO_NAMED_OPS 43 #define GET_INSTRMAP_INFO 44 #include "AMDGPUGenInstrInfo.inc" 45 #undef GET_INSTRMAP_INFO 46 #undef GET_INSTRINFO_NAMED_OPS 47 48 namespace { 49 50 /// \returns Bit mask for given bit \p Shift and bit \p Width. 51 unsigned getBitMask(unsigned Shift, unsigned Width) { 52 return ((1 << Width) - 1) << Shift; 53 } 54 55 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 56 /// 57 /// \returns Packed \p Dst. 58 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 59 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 60 Dst |= (Src << Shift) & getBitMask(Shift, Width); 61 return Dst; 62 } 63 64 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 65 /// 66 /// \returns Unpacked bits. 67 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 68 return (Src & getBitMask(Shift, Width)) >> Shift; 69 } 70 71 /// \returns Vmcnt bit shift (lower bits). 72 unsigned getVmcntBitShiftLo() { return 0; } 73 74 /// \returns Vmcnt bit width (lower bits). 75 unsigned getVmcntBitWidthLo() { return 4; } 76 77 /// \returns Expcnt bit shift. 78 unsigned getExpcntBitShift() { return 4; } 79 80 /// \returns Expcnt bit width. 81 unsigned getExpcntBitWidth() { return 3; } 82 83 /// \returns Lgkmcnt bit shift. 84 unsigned getLgkmcntBitShift() { return 8; } 85 86 /// \returns Lgkmcnt bit width. 87 unsigned getLgkmcntBitWidth() { return 4; } 88 89 /// \returns Vmcnt bit shift (higher bits). 90 unsigned getVmcntBitShiftHi() { return 14; } 91 92 /// \returns Vmcnt bit width (higher bits). 93 unsigned getVmcntBitWidthHi() { return 2; } 94 95 } // end namespace anonymous 96 97 namespace llvm { 98 99 static cl::opt<bool> EnablePackedInlinableLiterals( 100 "enable-packed-inlinable-literals", 101 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 102 cl::init(false)); 103 104 namespace AMDGPU { 105 106 LLVM_READNONE 107 static inline Channels indexToChannel(unsigned Channel) { 108 switch (Channel) { 109 case 1: 110 return AMDGPU::Channels_1; 111 case 2: 112 return AMDGPU::Channels_2; 113 case 3: 114 return AMDGPU::Channels_3; 115 case 4: 116 return AMDGPU::Channels_4; 117 default: 118 llvm_unreachable("invalid MIMG channel"); 119 } 120 } 121 122 123 // FIXME: Need to handle d16 images correctly. 124 static unsigned rcToChannels(unsigned RCID) { 125 switch (RCID) { 126 case AMDGPU::VGPR_32RegClassID: 127 return 1; 128 case AMDGPU::VReg_64RegClassID: 129 return 2; 130 case AMDGPU::VReg_96RegClassID: 131 return 3; 132 case AMDGPU::VReg_128RegClassID: 133 return 4; 134 default: 135 llvm_unreachable("invalid MIMG register class"); 136 } 137 } 138 139 int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { 140 AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels); 141 unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); 142 if (NewChannels == OrigChannels) 143 return Opc; 144 145 switch (OrigChannels) { 146 case 1: 147 return AMDGPU::getMaskedMIMGOp1(Opc, Channel); 148 case 2: 149 return AMDGPU::getMaskedMIMGOp2(Opc, Channel); 150 case 3: 151 return AMDGPU::getMaskedMIMGOp3(Opc, Channel); 152 case 4: 153 return AMDGPU::getMaskedMIMGOp4(Opc, Channel); 154 default: 155 llvm_unreachable("invalid MIMG channel"); 156 } 157 } 158 159 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 160 // header files, so we need to wrap it in a function that takes unsigned 161 // instead. 162 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 163 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 164 } 165 166 namespace IsaInfo { 167 168 IsaVersion getIsaVersion(const FeatureBitset &Features) { 169 // GCN GFX6 (Southern Islands (SI)). 170 if (Features.test(FeatureISAVersion6_0_0)) 171 return {6, 0, 0}; 172 if (Features.test(FeatureISAVersion6_0_1)) 173 return {6, 0, 1}; 174 175 // GCN GFX7 (Sea Islands (CI)). 176 if (Features.test(FeatureISAVersion7_0_0)) 177 return {7, 0, 0}; 178 if (Features.test(FeatureISAVersion7_0_1)) 179 return {7, 0, 1}; 180 if (Features.test(FeatureISAVersion7_0_2)) 181 return {7, 0, 2}; 182 if (Features.test(FeatureISAVersion7_0_3)) 183 return {7, 0, 3}; 184 if (Features.test(FeatureISAVersion7_0_4)) 185 return {7, 0, 4}; 186 187 // GCN GFX8 (Volcanic Islands (VI)). 188 if (Features.test(FeatureISAVersion8_0_0)) 189 return {8, 0, 0}; 190 if (Features.test(FeatureISAVersion8_0_1)) 191 return {8, 0, 1}; 192 if (Features.test(FeatureISAVersion8_0_2)) 193 return {8, 0, 2}; 194 if (Features.test(FeatureISAVersion8_0_3)) 195 return {8, 0, 3}; 196 if (Features.test(FeatureISAVersion8_1_0)) 197 return {8, 1, 0}; 198 199 // GCN GFX9. 200 if (Features.test(FeatureISAVersion9_0_0)) 201 return {9, 0, 0}; 202 if (Features.test(FeatureISAVersion9_0_2)) 203 return {9, 0, 2}; 204 205 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 206 return {0, 0, 0}; 207 return {7, 0, 0}; 208 } 209 210 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { 211 auto TargetTriple = STI->getTargetTriple(); 212 auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); 213 214 Stream << TargetTriple.getArchName() << '-' 215 << TargetTriple.getVendorName() << '-' 216 << TargetTriple.getOSName() << '-' 217 << TargetTriple.getEnvironmentName() << '-' 218 << "gfx" 219 << ISAVersion.Major 220 << ISAVersion.Minor 221 << ISAVersion.Stepping; 222 Stream.flush(); 223 } 224 225 bool hasCodeObjectV3(const FeatureBitset &Features) { 226 return Features.test(FeatureCodeObjectV3); 227 } 228 229 unsigned getWavefrontSize(const FeatureBitset &Features) { 230 if (Features.test(FeatureWavefrontSize16)) 231 return 16; 232 if (Features.test(FeatureWavefrontSize32)) 233 return 32; 234 235 return 64; 236 } 237 238 unsigned getLocalMemorySize(const FeatureBitset &Features) { 239 if (Features.test(FeatureLocalMemorySize32768)) 240 return 32768; 241 if (Features.test(FeatureLocalMemorySize65536)) 242 return 65536; 243 244 return 0; 245 } 246 247 unsigned getEUsPerCU(const FeatureBitset &Features) { 248 return 4; 249 } 250 251 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 252 unsigned FlatWorkGroupSize) { 253 if (!Features.test(FeatureGCN)) 254 return 8; 255 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 256 if (N == 1) 257 return 40; 258 N = 40 / N; 259 return std::min(N, 16u); 260 } 261 262 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 263 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 264 } 265 266 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 267 unsigned FlatWorkGroupSize) { 268 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 269 } 270 271 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 272 return 1; 273 } 274 275 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 276 if (!Features.test(FeatureGCN)) 277 return 8; 278 // FIXME: Need to take scratch memory into account. 279 return 10; 280 } 281 282 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 283 unsigned FlatWorkGroupSize) { 284 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 285 getEUsPerCU(Features)) / getEUsPerCU(Features); 286 } 287 288 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 289 return 1; 290 } 291 292 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 293 return 2048; 294 } 295 296 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 297 unsigned FlatWorkGroupSize) { 298 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 299 getWavefrontSize(Features); 300 } 301 302 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 303 IsaVersion Version = getIsaVersion(Features); 304 if (Version.Major >= 8) 305 return 16; 306 return 8; 307 } 308 309 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 310 return 8; 311 } 312 313 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 314 IsaVersion Version = getIsaVersion(Features); 315 if (Version.Major >= 8) 316 return 800; 317 return 512; 318 } 319 320 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 321 if (Features.test(FeatureSGPRInitBug)) 322 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 323 324 IsaVersion Version = getIsaVersion(Features); 325 if (Version.Major >= 8) 326 return 102; 327 return 104; 328 } 329 330 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 331 assert(WavesPerEU != 0); 332 333 if (WavesPerEU >= getMaxWavesPerEU(Features)) 334 return 0; 335 unsigned MinNumSGPRs = 336 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 337 getSGPRAllocGranule(Features)) + 1; 338 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 339 } 340 341 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 342 bool Addressable) { 343 assert(WavesPerEU != 0); 344 345 IsaVersion Version = getIsaVersion(Features); 346 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 347 getSGPRAllocGranule(Features)); 348 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 349 if (Version.Major >= 8 && !Addressable) 350 AddressableNumSGPRs = 112; 351 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 352 } 353 354 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 355 return 4; 356 } 357 358 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 359 return getVGPRAllocGranule(Features); 360 } 361 362 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 363 return 256; 364 } 365 366 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 367 return getTotalNumVGPRs(Features); 368 } 369 370 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 371 assert(WavesPerEU != 0); 372 373 if (WavesPerEU >= getMaxWavesPerEU(Features)) 374 return 0; 375 unsigned MinNumVGPRs = 376 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 377 getVGPRAllocGranule(Features)) + 1; 378 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 379 } 380 381 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 382 assert(WavesPerEU != 0); 383 384 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 385 getVGPRAllocGranule(Features)); 386 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 387 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 388 } 389 390 } // end namespace IsaInfo 391 392 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 393 const FeatureBitset &Features) { 394 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 395 396 memset(&Header, 0, sizeof(Header)); 397 398 Header.amd_kernel_code_version_major = 1; 399 Header.amd_kernel_code_version_minor = 1; 400 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 401 Header.amd_machine_version_major = ISA.Major; 402 Header.amd_machine_version_minor = ISA.Minor; 403 Header.amd_machine_version_stepping = ISA.Stepping; 404 Header.kernel_code_entry_byte_offset = sizeof(Header); 405 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 406 Header.wavefront_size = 6; 407 408 // If the code object does not support indirect functions, then the value must 409 // be 0xffffffff. 410 Header.call_convention = -1; 411 412 // These alignment values are specified in powers of two, so alignment = 413 // 2^n. The minimum alignment is 2^4 = 16. 414 Header.kernarg_segment_alignment = 4; 415 Header.group_segment_alignment = 4; 416 Header.private_segment_alignment = 4; 417 } 418 419 bool isGroupSegment(const GlobalValue *GV) { 420 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 421 } 422 423 bool isGlobalSegment(const GlobalValue *GV) { 424 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 425 } 426 427 bool isReadOnlySegment(const GlobalValue *GV) { 428 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 429 } 430 431 bool shouldEmitConstantsToTextSection(const Triple &TT) { 432 return TT.getOS() != Triple::AMDHSA; 433 } 434 435 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 436 Attribute A = F.getFnAttribute(Name); 437 int Result = Default; 438 439 if (A.isStringAttribute()) { 440 StringRef Str = A.getValueAsString(); 441 if (Str.getAsInteger(0, Result)) { 442 LLVMContext &Ctx = F.getContext(); 443 Ctx.emitError("can't parse integer attribute " + Name); 444 } 445 } 446 447 return Result; 448 } 449 450 std::pair<int, int> getIntegerPairAttribute(const Function &F, 451 StringRef Name, 452 std::pair<int, int> Default, 453 bool OnlyFirstRequired) { 454 Attribute A = F.getFnAttribute(Name); 455 if (!A.isStringAttribute()) 456 return Default; 457 458 LLVMContext &Ctx = F.getContext(); 459 std::pair<int, int> Ints = Default; 460 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 461 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 462 Ctx.emitError("can't parse first integer attribute " + Name); 463 return Default; 464 } 465 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 466 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 467 Ctx.emitError("can't parse second integer attribute " + Name); 468 return Default; 469 } 470 } 471 472 return Ints; 473 } 474 475 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 476 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 477 if (Version.Major < 9) 478 return VmcntLo; 479 480 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 481 return VmcntLo | VmcntHi; 482 } 483 484 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 485 return (1 << getExpcntBitWidth()) - 1; 486 } 487 488 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 489 return (1 << getLgkmcntBitWidth()) - 1; 490 } 491 492 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 493 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 494 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 495 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 496 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 497 if (Version.Major < 9) 498 return Waitcnt; 499 500 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 501 return Waitcnt | VmcntHi; 502 } 503 504 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 505 unsigned VmcntLo = 506 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 507 if (Version.Major < 9) 508 return VmcntLo; 509 510 unsigned VmcntHi = 511 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 512 VmcntHi <<= getVmcntBitWidthLo(); 513 return VmcntLo | VmcntHi; 514 } 515 516 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 517 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 518 } 519 520 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 521 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 522 } 523 524 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 525 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 526 Vmcnt = decodeVmcnt(Version, Waitcnt); 527 Expcnt = decodeExpcnt(Version, Waitcnt); 528 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 529 } 530 531 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 532 unsigned Vmcnt) { 533 Waitcnt = 534 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 535 if (Version.Major < 9) 536 return Waitcnt; 537 538 Vmcnt >>= getVmcntBitWidthLo(); 539 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 540 } 541 542 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 543 unsigned Expcnt) { 544 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 545 } 546 547 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 548 unsigned Lgkmcnt) { 549 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 550 } 551 552 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 553 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 554 unsigned Waitcnt = getWaitcntBitMask(Version); 555 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 556 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 557 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 558 return Waitcnt; 559 } 560 561 unsigned getInitialPSInputAddr(const Function &F) { 562 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 563 } 564 565 bool isShader(CallingConv::ID cc) { 566 switch(cc) { 567 case CallingConv::AMDGPU_VS: 568 case CallingConv::AMDGPU_LS: 569 case CallingConv::AMDGPU_HS: 570 case CallingConv::AMDGPU_ES: 571 case CallingConv::AMDGPU_GS: 572 case CallingConv::AMDGPU_PS: 573 case CallingConv::AMDGPU_CS: 574 return true; 575 default: 576 return false; 577 } 578 } 579 580 bool isCompute(CallingConv::ID cc) { 581 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 582 } 583 584 bool isEntryFunctionCC(CallingConv::ID CC) { 585 switch (CC) { 586 case CallingConv::AMDGPU_KERNEL: 587 case CallingConv::SPIR_KERNEL: 588 case CallingConv::AMDGPU_VS: 589 case CallingConv::AMDGPU_GS: 590 case CallingConv::AMDGPU_PS: 591 case CallingConv::AMDGPU_CS: 592 case CallingConv::AMDGPU_ES: 593 case CallingConv::AMDGPU_HS: 594 case CallingConv::AMDGPU_LS: 595 return true; 596 default: 597 return false; 598 } 599 } 600 601 bool isSI(const MCSubtargetInfo &STI) { 602 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 603 } 604 605 bool isCI(const MCSubtargetInfo &STI) { 606 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 607 } 608 609 bool isVI(const MCSubtargetInfo &STI) { 610 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 611 } 612 613 bool isGFX9(const MCSubtargetInfo &STI) { 614 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 615 } 616 617 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 618 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 619 } 620 621 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 622 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 623 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 624 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 625 Reg == AMDGPU::SCC; 626 } 627 628 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 629 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 630 if (*R == Reg1) return true; 631 } 632 return false; 633 } 634 635 #define MAP_REG2REG \ 636 using namespace AMDGPU; \ 637 switch(Reg) { \ 638 default: return Reg; \ 639 CASE_CI_VI(FLAT_SCR) \ 640 CASE_CI_VI(FLAT_SCR_LO) \ 641 CASE_CI_VI(FLAT_SCR_HI) \ 642 CASE_VI_GFX9(TTMP0) \ 643 CASE_VI_GFX9(TTMP1) \ 644 CASE_VI_GFX9(TTMP2) \ 645 CASE_VI_GFX9(TTMP3) \ 646 CASE_VI_GFX9(TTMP4) \ 647 CASE_VI_GFX9(TTMP5) \ 648 CASE_VI_GFX9(TTMP6) \ 649 CASE_VI_GFX9(TTMP7) \ 650 CASE_VI_GFX9(TTMP8) \ 651 CASE_VI_GFX9(TTMP9) \ 652 CASE_VI_GFX9(TTMP10) \ 653 CASE_VI_GFX9(TTMP11) \ 654 CASE_VI_GFX9(TTMP12) \ 655 CASE_VI_GFX9(TTMP13) \ 656 CASE_VI_GFX9(TTMP14) \ 657 CASE_VI_GFX9(TTMP15) \ 658 CASE_VI_GFX9(TTMP0_TTMP1) \ 659 CASE_VI_GFX9(TTMP2_TTMP3) \ 660 CASE_VI_GFX9(TTMP4_TTMP5) \ 661 CASE_VI_GFX9(TTMP6_TTMP7) \ 662 CASE_VI_GFX9(TTMP8_TTMP9) \ 663 CASE_VI_GFX9(TTMP10_TTMP11) \ 664 CASE_VI_GFX9(TTMP12_TTMP13) \ 665 CASE_VI_GFX9(TTMP14_TTMP15) \ 666 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \ 667 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \ 668 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \ 669 CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ 670 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 671 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 672 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 673 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 674 } 675 676 #define CASE_CI_VI(node) \ 677 assert(!isSI(STI)); \ 678 case node: return isCI(STI) ? node##_ci : node##_vi; 679 680 #define CASE_VI_GFX9(node) \ 681 case node: return isGFX9(STI) ? node##_gfx9 : node##_vi; 682 683 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 684 MAP_REG2REG 685 } 686 687 #undef CASE_CI_VI 688 #undef CASE_VI_GFX9 689 690 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 691 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node; 692 693 unsigned mc2PseudoReg(unsigned Reg) { 694 MAP_REG2REG 695 } 696 697 #undef CASE_CI_VI 698 #undef CASE_VI_GFX9 699 #undef MAP_REG2REG 700 701 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 702 assert(OpNo < Desc.NumOperands); 703 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 704 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 705 OpType <= AMDGPU::OPERAND_SRC_LAST; 706 } 707 708 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 709 assert(OpNo < Desc.NumOperands); 710 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 711 switch (OpType) { 712 case AMDGPU::OPERAND_REG_IMM_FP32: 713 case AMDGPU::OPERAND_REG_IMM_FP64: 714 case AMDGPU::OPERAND_REG_IMM_FP16: 715 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 716 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 717 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 718 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 719 return true; 720 default: 721 return false; 722 } 723 } 724 725 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 726 assert(OpNo < Desc.NumOperands); 727 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 728 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 729 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 730 } 731 732 // Avoid using MCRegisterClass::getSize, since that function will go away 733 // (move from MC* level to Target* level). Return size in bits. 734 unsigned getRegBitWidth(unsigned RCID) { 735 switch (RCID) { 736 case AMDGPU::SGPR_32RegClassID: 737 case AMDGPU::VGPR_32RegClassID: 738 case AMDGPU::VS_32RegClassID: 739 case AMDGPU::SReg_32RegClassID: 740 case AMDGPU::SReg_32_XM0RegClassID: 741 return 32; 742 case AMDGPU::SGPR_64RegClassID: 743 case AMDGPU::VS_64RegClassID: 744 case AMDGPU::SReg_64RegClassID: 745 case AMDGPU::VReg_64RegClassID: 746 return 64; 747 case AMDGPU::VReg_96RegClassID: 748 return 96; 749 case AMDGPU::SGPR_128RegClassID: 750 case AMDGPU::SReg_128RegClassID: 751 case AMDGPU::VReg_128RegClassID: 752 return 128; 753 case AMDGPU::SReg_256RegClassID: 754 case AMDGPU::VReg_256RegClassID: 755 return 256; 756 case AMDGPU::SReg_512RegClassID: 757 case AMDGPU::VReg_512RegClassID: 758 return 512; 759 default: 760 llvm_unreachable("Unexpected register class"); 761 } 762 } 763 764 unsigned getRegBitWidth(const MCRegisterClass &RC) { 765 return getRegBitWidth(RC.getID()); 766 } 767 768 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 769 unsigned OpNo) { 770 assert(OpNo < Desc.NumOperands); 771 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 772 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 773 } 774 775 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 776 if (Literal >= -16 && Literal <= 64) 777 return true; 778 779 uint64_t Val = static_cast<uint64_t>(Literal); 780 return (Val == DoubleToBits(0.0)) || 781 (Val == DoubleToBits(1.0)) || 782 (Val == DoubleToBits(-1.0)) || 783 (Val == DoubleToBits(0.5)) || 784 (Val == DoubleToBits(-0.5)) || 785 (Val == DoubleToBits(2.0)) || 786 (Val == DoubleToBits(-2.0)) || 787 (Val == DoubleToBits(4.0)) || 788 (Val == DoubleToBits(-4.0)) || 789 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 790 } 791 792 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 793 if (Literal >= -16 && Literal <= 64) 794 return true; 795 796 // The actual type of the operand does not seem to matter as long 797 // as the bits match one of the inline immediate values. For example: 798 // 799 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 800 // so it is a legal inline immediate. 801 // 802 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 803 // floating-point, so it is a legal inline immediate. 804 805 uint32_t Val = static_cast<uint32_t>(Literal); 806 return (Val == FloatToBits(0.0f)) || 807 (Val == FloatToBits(1.0f)) || 808 (Val == FloatToBits(-1.0f)) || 809 (Val == FloatToBits(0.5f)) || 810 (Val == FloatToBits(-0.5f)) || 811 (Val == FloatToBits(2.0f)) || 812 (Val == FloatToBits(-2.0f)) || 813 (Val == FloatToBits(4.0f)) || 814 (Val == FloatToBits(-4.0f)) || 815 (Val == 0x3e22f983 && HasInv2Pi); 816 } 817 818 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 819 if (!HasInv2Pi) 820 return false; 821 822 if (Literal >= -16 && Literal <= 64) 823 return true; 824 825 uint16_t Val = static_cast<uint16_t>(Literal); 826 return Val == 0x3C00 || // 1.0 827 Val == 0xBC00 || // -1.0 828 Val == 0x3800 || // 0.5 829 Val == 0xB800 || // -0.5 830 Val == 0x4000 || // 2.0 831 Val == 0xC000 || // -2.0 832 Val == 0x4400 || // 4.0 833 Val == 0xC400 || // -4.0 834 Val == 0x3118; // 1/2pi 835 } 836 837 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 838 assert(HasInv2Pi); 839 840 if (!EnablePackedInlinableLiterals) 841 return false; 842 843 int16_t Lo16 = static_cast<int16_t>(Literal); 844 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 845 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 846 } 847 848 bool isArgPassedInSGPR(const Argument *A) { 849 const Function *F = A->getParent(); 850 851 // Arguments to compute shaders are never a source of divergence. 852 CallingConv::ID CC = F->getCallingConv(); 853 switch (CC) { 854 case CallingConv::AMDGPU_KERNEL: 855 case CallingConv::SPIR_KERNEL: 856 return true; 857 case CallingConv::AMDGPU_VS: 858 case CallingConv::AMDGPU_LS: 859 case CallingConv::AMDGPU_HS: 860 case CallingConv::AMDGPU_ES: 861 case CallingConv::AMDGPU_GS: 862 case CallingConv::AMDGPU_PS: 863 case CallingConv::AMDGPU_CS: 864 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 865 // Everything else is in VGPRs. 866 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 867 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 868 default: 869 // TODO: Should calls support inreg for SGPR inputs? 870 return false; 871 } 872 } 873 874 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. 875 bool isUniformMMO(const MachineMemOperand *MMO) { 876 const Value *Ptr = MMO->getValue(); 877 // UndefValue means this is a load of a kernel input. These are uniform. 878 // Sometimes LDS instructions have constant pointers. 879 // If Ptr is null, then that means this mem operand contains a 880 // PseudoSourceValue like GOT. 881 if (!Ptr || isa<UndefValue>(Ptr) || 882 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 883 return true; 884 885 if (const Argument *Arg = dyn_cast<Argument>(Ptr)) 886 return isArgPassedInSGPR(Arg); 887 888 const Instruction *I = dyn_cast<Instruction>(Ptr); 889 return I && I->getMetadata("amdgpu.uniform"); 890 } 891 892 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 893 if (isGCN3Encoding(ST)) 894 return ByteOffset; 895 return ByteOffset >> 2; 896 } 897 898 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 899 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 900 return isGCN3Encoding(ST) ? 901 isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 902 } 903 904 } // end namespace AMDGPU 905 906 } // end namespace llvm 907 908 namespace llvm { 909 namespace AMDGPU { 910 911 AMDGPUAS getAMDGPUAS(Triple T) { 912 auto Env = T.getEnvironmentName(); 913 AMDGPUAS AS; 914 if (Env == "amdgiz" || Env == "amdgizcl") { 915 AS.FLAT_ADDRESS = 0; 916 AS.PRIVATE_ADDRESS = 5; 917 AS.REGION_ADDRESS = 4; 918 } 919 else { 920 AS.FLAT_ADDRESS = 4; 921 AS.PRIVATE_ADDRESS = 0; 922 AS.REGION_ADDRESS = 5; 923 } 924 return AS; 925 } 926 927 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 928 return getAMDGPUAS(M.getTargetTriple()); 929 } 930 931 AMDGPUAS getAMDGPUAS(const Module &M) { 932 return getAMDGPUAS(Triple(M.getTargetTriple())); 933 } 934 } // namespace AMDGPU 935 } // namespace llvm 936