1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/MC/MCSectionELF.h" 28 #include "llvm/MC/MCSubtargetInfo.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 #define GET_INSTRINFO_NAMED_OPS 42 #include "AMDGPUGenInstrInfo.inc" 43 #undef GET_INSTRINFO_NAMED_OPS 44 45 namespace { 46 47 /// \returns Bit mask for given bit \p Shift and bit \p Width. 48 unsigned getBitMask(unsigned Shift, unsigned Width) { 49 return ((1 << Width) - 1) << Shift; 50 } 51 52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Packed \p Dst. 55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 56 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 57 Dst |= (Src << Shift) & getBitMask(Shift, Width); 58 return Dst; 59 } 60 61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 62 /// 63 /// \returns Unpacked bits. 64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 65 return (Src & getBitMask(Shift, Width)) >> Shift; 66 } 67 68 /// \returns Vmcnt bit shift (lower bits). 69 unsigned getVmcntBitShiftLo() { return 0; } 70 71 /// \returns Vmcnt bit width (lower bits). 72 unsigned getVmcntBitWidthLo() { return 4; } 73 74 /// \returns Expcnt bit shift. 75 unsigned getExpcntBitShift() { return 4; } 76 77 /// \returns Expcnt bit width. 78 unsigned getExpcntBitWidth() { return 3; } 79 80 /// \returns Lgkmcnt bit shift. 81 unsigned getLgkmcntBitShift() { return 8; } 82 83 /// \returns Lgkmcnt bit width. 84 unsigned getLgkmcntBitWidth() { return 4; } 85 86 /// \returns Vmcnt bit shift (higher bits). 87 unsigned getVmcntBitShiftHi() { return 14; } 88 89 /// \returns Vmcnt bit width (higher bits). 90 unsigned getVmcntBitWidthHi() { return 2; } 91 92 } // end namespace anonymous 93 94 namespace llvm { 95 96 static cl::opt<bool> EnablePackedInlinableLiterals( 97 "enable-packed-inlinable-literals", 98 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 99 cl::init(false)); 100 101 namespace AMDGPU { 102 103 namespace IsaInfo { 104 105 IsaVersion getIsaVersion(const FeatureBitset &Features) { 106 // GCN GFX6 (Southern Islands (SI)). 107 if (Features.test(FeatureISAVersion6_0_0)) 108 return {6, 0, 0}; 109 if (Features.test(FeatureISAVersion6_0_1)) 110 return {6, 0, 1}; 111 112 // GCN GFX7 (Sea Islands (CI)). 113 if (Features.test(FeatureISAVersion7_0_0)) 114 return {7, 0, 0}; 115 if (Features.test(FeatureISAVersion7_0_1)) 116 return {7, 0, 1}; 117 if (Features.test(FeatureISAVersion7_0_2)) 118 return {7, 0, 2}; 119 if (Features.test(FeatureISAVersion7_0_3)) 120 return {7, 0, 3}; 121 if (Features.test(FeatureISAVersion7_0_4)) 122 return {7, 0, 4}; 123 124 // GCN GFX8 (Volcanic Islands (VI)). 125 if (Features.test(FeatureISAVersion8_0_0)) 126 return {8, 0, 0}; 127 if (Features.test(FeatureISAVersion8_0_1)) 128 return {8, 0, 1}; 129 if (Features.test(FeatureISAVersion8_0_2)) 130 return {8, 0, 2}; 131 if (Features.test(FeatureISAVersion8_0_3)) 132 return {8, 0, 3}; 133 if (Features.test(FeatureISAVersion8_1_0)) 134 return {8, 1, 0}; 135 136 // GCN GFX9. 137 if (Features.test(FeatureISAVersion9_0_0)) 138 return {9, 0, 0}; 139 if (Features.test(FeatureISAVersion9_0_2)) 140 return {9, 0, 2}; 141 142 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 143 return {0, 0, 0}; 144 return {7, 0, 0}; 145 } 146 147 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { 148 auto TargetTriple = STI->getTargetTriple(); 149 auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); 150 151 Stream << TargetTriple.getArchName() << '-' 152 << TargetTriple.getVendorName() << '-' 153 << TargetTriple.getOSName() << '-' 154 << TargetTriple.getEnvironmentName() << '-' 155 << "gfx" 156 << ISAVersion.Major 157 << ISAVersion.Minor 158 << ISAVersion.Stepping; 159 Stream.flush(); 160 } 161 162 bool hasCodeObjectV3(const FeatureBitset &Features) { 163 return Features.test(FeatureCodeObjectV3); 164 } 165 166 unsigned getWavefrontSize(const FeatureBitset &Features) { 167 if (Features.test(FeatureWavefrontSize16)) 168 return 16; 169 if (Features.test(FeatureWavefrontSize32)) 170 return 32; 171 172 return 64; 173 } 174 175 unsigned getLocalMemorySize(const FeatureBitset &Features) { 176 if (Features.test(FeatureLocalMemorySize32768)) 177 return 32768; 178 if (Features.test(FeatureLocalMemorySize65536)) 179 return 65536; 180 181 return 0; 182 } 183 184 unsigned getEUsPerCU(const FeatureBitset &Features) { 185 return 4; 186 } 187 188 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 189 unsigned FlatWorkGroupSize) { 190 if (!Features.test(FeatureGCN)) 191 return 8; 192 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 193 if (N == 1) 194 return 40; 195 N = 40 / N; 196 return std::min(N, 16u); 197 } 198 199 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 200 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 201 } 202 203 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 204 unsigned FlatWorkGroupSize) { 205 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 206 } 207 208 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 209 return 1; 210 } 211 212 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 213 if (!Features.test(FeatureGCN)) 214 return 8; 215 // FIXME: Need to take scratch memory into account. 216 return 10; 217 } 218 219 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 220 unsigned FlatWorkGroupSize) { 221 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 222 getEUsPerCU(Features)) / getEUsPerCU(Features); 223 } 224 225 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 226 return 1; 227 } 228 229 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 230 return 2048; 231 } 232 233 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 234 unsigned FlatWorkGroupSize) { 235 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 236 getWavefrontSize(Features); 237 } 238 239 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 240 IsaVersion Version = getIsaVersion(Features); 241 if (Version.Major >= 8) 242 return 16; 243 return 8; 244 } 245 246 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 247 return 8; 248 } 249 250 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 251 IsaVersion Version = getIsaVersion(Features); 252 if (Version.Major >= 8) 253 return 800; 254 return 512; 255 } 256 257 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 258 if (Features.test(FeatureSGPRInitBug)) 259 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 260 261 IsaVersion Version = getIsaVersion(Features); 262 if (Version.Major >= 8) 263 return 102; 264 return 104; 265 } 266 267 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 268 assert(WavesPerEU != 0); 269 270 if (WavesPerEU >= getMaxWavesPerEU(Features)) 271 return 0; 272 unsigned MinNumSGPRs = 273 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 274 getSGPRAllocGranule(Features)) + 1; 275 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 276 } 277 278 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 279 bool Addressable) { 280 assert(WavesPerEU != 0); 281 282 IsaVersion Version = getIsaVersion(Features); 283 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 284 getSGPRAllocGranule(Features)); 285 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 286 if (Version.Major >= 8 && !Addressable) 287 AddressableNumSGPRs = 112; 288 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 289 } 290 291 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 292 return 4; 293 } 294 295 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 296 return getVGPRAllocGranule(Features); 297 } 298 299 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 300 return 256; 301 } 302 303 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 304 return getTotalNumVGPRs(Features); 305 } 306 307 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 308 assert(WavesPerEU != 0); 309 310 if (WavesPerEU >= getMaxWavesPerEU(Features)) 311 return 0; 312 unsigned MinNumVGPRs = 313 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 314 getVGPRAllocGranule(Features)) + 1; 315 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 316 } 317 318 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 319 assert(WavesPerEU != 0); 320 321 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 322 getVGPRAllocGranule(Features)); 323 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 324 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 325 } 326 327 } // end namespace IsaInfo 328 329 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 330 const FeatureBitset &Features) { 331 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 332 333 memset(&Header, 0, sizeof(Header)); 334 335 Header.amd_kernel_code_version_major = 1; 336 Header.amd_kernel_code_version_minor = 1; 337 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 338 Header.amd_machine_version_major = ISA.Major; 339 Header.amd_machine_version_minor = ISA.Minor; 340 Header.amd_machine_version_stepping = ISA.Stepping; 341 Header.kernel_code_entry_byte_offset = sizeof(Header); 342 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 343 Header.wavefront_size = 6; 344 345 // If the code object does not support indirect functions, then the value must 346 // be 0xffffffff. 347 Header.call_convention = -1; 348 349 // These alignment values are specified in powers of two, so alignment = 350 // 2^n. The minimum alignment is 2^4 = 16. 351 Header.kernarg_segment_alignment = 4; 352 Header.group_segment_alignment = 4; 353 Header.private_segment_alignment = 4; 354 } 355 356 bool isGroupSegment(const GlobalValue *GV) { 357 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 358 } 359 360 bool isGlobalSegment(const GlobalValue *GV) { 361 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 362 } 363 364 bool isReadOnlySegment(const GlobalValue *GV) { 365 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 366 } 367 368 bool shouldEmitConstantsToTextSection(const Triple &TT) { 369 return TT.getOS() != Triple::AMDHSA; 370 } 371 372 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 373 Attribute A = F.getFnAttribute(Name); 374 int Result = Default; 375 376 if (A.isStringAttribute()) { 377 StringRef Str = A.getValueAsString(); 378 if (Str.getAsInteger(0, Result)) { 379 LLVMContext &Ctx = F.getContext(); 380 Ctx.emitError("can't parse integer attribute " + Name); 381 } 382 } 383 384 return Result; 385 } 386 387 std::pair<int, int> getIntegerPairAttribute(const Function &F, 388 StringRef Name, 389 std::pair<int, int> Default, 390 bool OnlyFirstRequired) { 391 Attribute A = F.getFnAttribute(Name); 392 if (!A.isStringAttribute()) 393 return Default; 394 395 LLVMContext &Ctx = F.getContext(); 396 std::pair<int, int> Ints = Default; 397 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 398 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 399 Ctx.emitError("can't parse first integer attribute " + Name); 400 return Default; 401 } 402 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 403 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 404 Ctx.emitError("can't parse second integer attribute " + Name); 405 return Default; 406 } 407 } 408 409 return Ints; 410 } 411 412 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 413 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 414 if (Version.Major < 9) 415 return VmcntLo; 416 417 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 418 return VmcntLo | VmcntHi; 419 } 420 421 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 422 return (1 << getExpcntBitWidth()) - 1; 423 } 424 425 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 426 return (1 << getLgkmcntBitWidth()) - 1; 427 } 428 429 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 430 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 431 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 432 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 433 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 434 if (Version.Major < 9) 435 return Waitcnt; 436 437 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 438 return Waitcnt | VmcntHi; 439 } 440 441 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 442 unsigned VmcntLo = 443 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 444 if (Version.Major < 9) 445 return VmcntLo; 446 447 unsigned VmcntHi = 448 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 449 VmcntHi <<= getVmcntBitWidthLo(); 450 return VmcntLo | VmcntHi; 451 } 452 453 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 454 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 455 } 456 457 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 458 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 459 } 460 461 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 462 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 463 Vmcnt = decodeVmcnt(Version, Waitcnt); 464 Expcnt = decodeExpcnt(Version, Waitcnt); 465 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 466 } 467 468 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 469 unsigned Vmcnt) { 470 Waitcnt = 471 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 472 if (Version.Major < 9) 473 return Waitcnt; 474 475 Vmcnt >>= getVmcntBitWidthLo(); 476 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 477 } 478 479 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 480 unsigned Expcnt) { 481 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 482 } 483 484 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 485 unsigned Lgkmcnt) { 486 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 487 } 488 489 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 490 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 491 unsigned Waitcnt = getWaitcntBitMask(Version); 492 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 493 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 494 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 495 return Waitcnt; 496 } 497 498 unsigned getInitialPSInputAddr(const Function &F) { 499 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 500 } 501 502 bool isShader(CallingConv::ID cc) { 503 switch(cc) { 504 case CallingConv::AMDGPU_VS: 505 case CallingConv::AMDGPU_LS: 506 case CallingConv::AMDGPU_HS: 507 case CallingConv::AMDGPU_ES: 508 case CallingConv::AMDGPU_GS: 509 case CallingConv::AMDGPU_PS: 510 case CallingConv::AMDGPU_CS: 511 return true; 512 default: 513 return false; 514 } 515 } 516 517 bool isCompute(CallingConv::ID cc) { 518 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 519 } 520 521 bool isEntryFunctionCC(CallingConv::ID CC) { 522 switch (CC) { 523 case CallingConv::AMDGPU_KERNEL: 524 case CallingConv::SPIR_KERNEL: 525 case CallingConv::AMDGPU_VS: 526 case CallingConv::AMDGPU_GS: 527 case CallingConv::AMDGPU_PS: 528 case CallingConv::AMDGPU_CS: 529 case CallingConv::AMDGPU_ES: 530 case CallingConv::AMDGPU_HS: 531 case CallingConv::AMDGPU_LS: 532 return true; 533 default: 534 return false; 535 } 536 } 537 538 bool isSI(const MCSubtargetInfo &STI) { 539 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 540 } 541 542 bool isCI(const MCSubtargetInfo &STI) { 543 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 544 } 545 546 bool isVI(const MCSubtargetInfo &STI) { 547 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 548 } 549 550 bool isGFX9(const MCSubtargetInfo &STI) { 551 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 552 } 553 554 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 555 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 556 } 557 558 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 559 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 560 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 561 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 562 Reg == AMDGPU::SCC; 563 } 564 565 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 566 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 567 if (*R == Reg1) return true; 568 } 569 return false; 570 } 571 572 #define MAP_REG2REG \ 573 using namespace AMDGPU; \ 574 switch(Reg) { \ 575 default: return Reg; \ 576 CASE_CI_VI(FLAT_SCR) \ 577 CASE_CI_VI(FLAT_SCR_LO) \ 578 CASE_CI_VI(FLAT_SCR_HI) \ 579 CASE_VI_GFX9(TTMP0) \ 580 CASE_VI_GFX9(TTMP1) \ 581 CASE_VI_GFX9(TTMP2) \ 582 CASE_VI_GFX9(TTMP3) \ 583 CASE_VI_GFX9(TTMP4) \ 584 CASE_VI_GFX9(TTMP5) \ 585 CASE_VI_GFX9(TTMP6) \ 586 CASE_VI_GFX9(TTMP7) \ 587 CASE_VI_GFX9(TTMP8) \ 588 CASE_VI_GFX9(TTMP9) \ 589 CASE_VI_GFX9(TTMP10) \ 590 CASE_VI_GFX9(TTMP11) \ 591 CASE_VI_GFX9(TTMP12) \ 592 CASE_VI_GFX9(TTMP13) \ 593 CASE_VI_GFX9(TTMP14) \ 594 CASE_VI_GFX9(TTMP15) \ 595 CASE_VI_GFX9(TTMP0_TTMP1) \ 596 CASE_VI_GFX9(TTMP2_TTMP3) \ 597 CASE_VI_GFX9(TTMP4_TTMP5) \ 598 CASE_VI_GFX9(TTMP6_TTMP7) \ 599 CASE_VI_GFX9(TTMP8_TTMP9) \ 600 CASE_VI_GFX9(TTMP10_TTMP11) \ 601 CASE_VI_GFX9(TTMP12_TTMP13) \ 602 CASE_VI_GFX9(TTMP14_TTMP15) \ 603 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \ 604 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \ 605 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \ 606 CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ 607 } 608 609 #define CASE_CI_VI(node) \ 610 assert(!isSI(STI)); \ 611 case node: return isCI(STI) ? node##_ci : node##_vi; 612 613 #define CASE_VI_GFX9(node) \ 614 case node: return isGFX9(STI) ? node##_gfx9 : node##_vi; 615 616 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 617 MAP_REG2REG 618 } 619 620 #undef CASE_CI_VI 621 #undef CASE_VI_GFX9 622 623 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 624 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node; 625 626 unsigned mc2PseudoReg(unsigned Reg) { 627 MAP_REG2REG 628 } 629 630 #undef CASE_CI_VI 631 #undef CASE_VI_GFX9 632 #undef MAP_REG2REG 633 634 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 635 assert(OpNo < Desc.NumOperands); 636 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 637 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 638 OpType <= AMDGPU::OPERAND_SRC_LAST; 639 } 640 641 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 642 assert(OpNo < Desc.NumOperands); 643 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 644 switch (OpType) { 645 case AMDGPU::OPERAND_REG_IMM_FP32: 646 case AMDGPU::OPERAND_REG_IMM_FP64: 647 case AMDGPU::OPERAND_REG_IMM_FP16: 648 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 649 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 650 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 651 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 652 return true; 653 default: 654 return false; 655 } 656 } 657 658 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 659 assert(OpNo < Desc.NumOperands); 660 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 661 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 662 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 663 } 664 665 // Avoid using MCRegisterClass::getSize, since that function will go away 666 // (move from MC* level to Target* level). Return size in bits. 667 unsigned getRegBitWidth(unsigned RCID) { 668 switch (RCID) { 669 case AMDGPU::SGPR_32RegClassID: 670 case AMDGPU::VGPR_32RegClassID: 671 case AMDGPU::VS_32RegClassID: 672 case AMDGPU::SReg_32RegClassID: 673 case AMDGPU::SReg_32_XM0RegClassID: 674 return 32; 675 case AMDGPU::SGPR_64RegClassID: 676 case AMDGPU::VS_64RegClassID: 677 case AMDGPU::SReg_64RegClassID: 678 case AMDGPU::VReg_64RegClassID: 679 return 64; 680 case AMDGPU::VReg_96RegClassID: 681 return 96; 682 case AMDGPU::SGPR_128RegClassID: 683 case AMDGPU::SReg_128RegClassID: 684 case AMDGPU::VReg_128RegClassID: 685 return 128; 686 case AMDGPU::SReg_256RegClassID: 687 case AMDGPU::VReg_256RegClassID: 688 return 256; 689 case AMDGPU::SReg_512RegClassID: 690 case AMDGPU::VReg_512RegClassID: 691 return 512; 692 default: 693 llvm_unreachable("Unexpected register class"); 694 } 695 } 696 697 unsigned getRegBitWidth(const MCRegisterClass &RC) { 698 return getRegBitWidth(RC.getID()); 699 } 700 701 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 702 unsigned OpNo) { 703 assert(OpNo < Desc.NumOperands); 704 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 705 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 706 } 707 708 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 709 if (Literal >= -16 && Literal <= 64) 710 return true; 711 712 uint64_t Val = static_cast<uint64_t>(Literal); 713 return (Val == DoubleToBits(0.0)) || 714 (Val == DoubleToBits(1.0)) || 715 (Val == DoubleToBits(-1.0)) || 716 (Val == DoubleToBits(0.5)) || 717 (Val == DoubleToBits(-0.5)) || 718 (Val == DoubleToBits(2.0)) || 719 (Val == DoubleToBits(-2.0)) || 720 (Val == DoubleToBits(4.0)) || 721 (Val == DoubleToBits(-4.0)) || 722 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 723 } 724 725 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 726 if (Literal >= -16 && Literal <= 64) 727 return true; 728 729 // The actual type of the operand does not seem to matter as long 730 // as the bits match one of the inline immediate values. For example: 731 // 732 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 733 // so it is a legal inline immediate. 734 // 735 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 736 // floating-point, so it is a legal inline immediate. 737 738 uint32_t Val = static_cast<uint32_t>(Literal); 739 return (Val == FloatToBits(0.0f)) || 740 (Val == FloatToBits(1.0f)) || 741 (Val == FloatToBits(-1.0f)) || 742 (Val == FloatToBits(0.5f)) || 743 (Val == FloatToBits(-0.5f)) || 744 (Val == FloatToBits(2.0f)) || 745 (Val == FloatToBits(-2.0f)) || 746 (Val == FloatToBits(4.0f)) || 747 (Val == FloatToBits(-4.0f)) || 748 (Val == 0x3e22f983 && HasInv2Pi); 749 } 750 751 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 752 if (!HasInv2Pi) 753 return false; 754 755 if (Literal >= -16 && Literal <= 64) 756 return true; 757 758 uint16_t Val = static_cast<uint16_t>(Literal); 759 return Val == 0x3C00 || // 1.0 760 Val == 0xBC00 || // -1.0 761 Val == 0x3800 || // 0.5 762 Val == 0xB800 || // -0.5 763 Val == 0x4000 || // 2.0 764 Val == 0xC000 || // -2.0 765 Val == 0x4400 || // 4.0 766 Val == 0xC400 || // -4.0 767 Val == 0x3118; // 1/2pi 768 } 769 770 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 771 assert(HasInv2Pi); 772 773 if (!EnablePackedInlinableLiterals) 774 return false; 775 776 int16_t Lo16 = static_cast<int16_t>(Literal); 777 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 778 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 779 } 780 781 bool isArgPassedInSGPR(const Argument *A) { 782 const Function *F = A->getParent(); 783 784 // Arguments to compute shaders are never a source of divergence. 785 CallingConv::ID CC = F->getCallingConv(); 786 switch (CC) { 787 case CallingConv::AMDGPU_KERNEL: 788 case CallingConv::SPIR_KERNEL: 789 return true; 790 case CallingConv::AMDGPU_VS: 791 case CallingConv::AMDGPU_LS: 792 case CallingConv::AMDGPU_HS: 793 case CallingConv::AMDGPU_ES: 794 case CallingConv::AMDGPU_GS: 795 case CallingConv::AMDGPU_PS: 796 case CallingConv::AMDGPU_CS: 797 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 798 // Everything else is in VGPRs. 799 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 800 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 801 default: 802 // TODO: Should calls support inreg for SGPR inputs? 803 return false; 804 } 805 } 806 807 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. 808 bool isUniformMMO(const MachineMemOperand *MMO) { 809 const Value *Ptr = MMO->getValue(); 810 // UndefValue means this is a load of a kernel input. These are uniform. 811 // Sometimes LDS instructions have constant pointers. 812 // If Ptr is null, then that means this mem operand contains a 813 // PseudoSourceValue like GOT. 814 if (!Ptr || isa<UndefValue>(Ptr) || 815 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 816 return true; 817 818 if (const Argument *Arg = dyn_cast<Argument>(Ptr)) 819 return isArgPassedInSGPR(Arg); 820 821 const Instruction *I = dyn_cast<Instruction>(Ptr); 822 return I && I->getMetadata("amdgpu.uniform"); 823 } 824 825 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 826 if (isGCN3Encoding(ST)) 827 return ByteOffset; 828 return ByteOffset >> 2; 829 } 830 831 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 832 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 833 return isGCN3Encoding(ST) ? 834 isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 835 } 836 } // end namespace AMDGPU 837 838 } // end namespace llvm 839 840 namespace llvm { 841 namespace AMDGPU { 842 843 AMDGPUAS getAMDGPUAS(Triple T) { 844 auto Env = T.getEnvironmentName(); 845 AMDGPUAS AS; 846 if (Env == "amdgiz" || Env == "amdgizcl") { 847 AS.FLAT_ADDRESS = 0; 848 AS.PRIVATE_ADDRESS = 5; 849 AS.REGION_ADDRESS = 4; 850 } 851 else { 852 AS.FLAT_ADDRESS = 4; 853 AS.PRIVATE_ADDRESS = 0; 854 AS.REGION_ADDRESS = 5; 855 } 856 return AS; 857 } 858 859 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 860 return getAMDGPUAS(M.getTargetTriple()); 861 } 862 863 AMDGPUAS getAMDGPUAS(const Module &M) { 864 return getAMDGPUAS(Triple(M.getTargetTriple())); 865 } 866 } // namespace AMDGPU 867 } // namespace llvm 868