1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/MC/MCSectionELF.h" 28 #include "llvm/MC/MCSubtargetInfo.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 #define GET_INSTRINFO_NAMED_OPS 42 #include "AMDGPUGenInstrInfo.inc" 43 #undef GET_INSTRINFO_NAMED_OPS 44 45 namespace { 46 47 /// \returns Bit mask for given bit \p Shift and bit \p Width. 48 unsigned getBitMask(unsigned Shift, unsigned Width) { 49 return ((1 << Width) - 1) << Shift; 50 } 51 52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Packed \p Dst. 55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 56 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 57 Dst |= (Src << Shift) & getBitMask(Shift, Width); 58 return Dst; 59 } 60 61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 62 /// 63 /// \returns Unpacked bits. 64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 65 return (Src & getBitMask(Shift, Width)) >> Shift; 66 } 67 68 /// \returns Vmcnt bit shift (lower bits). 69 unsigned getVmcntBitShiftLo() { return 0; } 70 71 /// \returns Vmcnt bit width (lower bits). 72 unsigned getVmcntBitWidthLo() { return 4; } 73 74 /// \returns Expcnt bit shift. 75 unsigned getExpcntBitShift() { return 4; } 76 77 /// \returns Expcnt bit width. 78 unsigned getExpcntBitWidth() { return 3; } 79 80 /// \returns Lgkmcnt bit shift. 81 unsigned getLgkmcntBitShift() { return 8; } 82 83 /// \returns Lgkmcnt bit width. 84 unsigned getLgkmcntBitWidth() { return 4; } 85 86 /// \returns Vmcnt bit shift (higher bits). 87 unsigned getVmcntBitShiftHi() { return 14; } 88 89 /// \returns Vmcnt bit width (higher bits). 90 unsigned getVmcntBitWidthHi() { return 2; } 91 92 } // end namespace anonymous 93 94 namespace llvm { 95 96 static cl::opt<bool> EnablePackedInlinableLiterals( 97 "enable-packed-inlinable-literals", 98 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 99 cl::init(false)); 100 101 namespace AMDGPU { 102 103 namespace IsaInfo { 104 105 IsaVersion getIsaVersion(const FeatureBitset &Features) { 106 // SI. 107 if (Features.test(FeatureISAVersion6_0_0)) 108 return {6, 0, 0}; 109 if (Features.test(FeatureISAVersion6_0_1)) 110 return {6, 0, 1}; 111 // CI. 112 if (Features.test(FeatureISAVersion7_0_0)) 113 return {7, 0, 0}; 114 if (Features.test(FeatureISAVersion7_0_1)) 115 return {7, 0, 1}; 116 if (Features.test(FeatureISAVersion7_0_2)) 117 return {7, 0, 2}; 118 if (Features.test(FeatureISAVersion7_0_3)) 119 return {7, 0, 3}; 120 121 // VI. 122 if (Features.test(FeatureISAVersion8_0_0)) 123 return {8, 0, 0}; 124 if (Features.test(FeatureISAVersion8_0_1)) 125 return {8, 0, 1}; 126 if (Features.test(FeatureISAVersion8_0_2)) 127 return {8, 0, 2}; 128 if (Features.test(FeatureISAVersion8_0_3)) 129 return {8, 0, 3}; 130 if (Features.test(FeatureISAVersion8_0_4)) 131 return {8, 0, 4}; 132 if (Features.test(FeatureISAVersion8_1_0)) 133 return {8, 1, 0}; 134 135 // GFX9. 136 if (Features.test(FeatureISAVersion9_0_0)) 137 return {9, 0, 0}; 138 if (Features.test(FeatureISAVersion9_0_1)) 139 return {9, 0, 1}; 140 if (Features.test(FeatureISAVersion9_0_2)) 141 return {9, 0, 2}; 142 if (Features.test(FeatureISAVersion9_0_3)) 143 return {9, 0, 3}; 144 145 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 146 return {0, 0, 0}; 147 return {7, 0, 0}; 148 } 149 150 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { 151 auto TargetTriple = STI->getTargetTriple(); 152 auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); 153 154 Stream << TargetTriple.getArchName() << '-' 155 << TargetTriple.getVendorName() << '-' 156 << TargetTriple.getOSName() << '-' 157 << TargetTriple.getEnvironmentName() << '-' 158 << "gfx" 159 << ISAVersion.Major 160 << ISAVersion.Minor 161 << ISAVersion.Stepping; 162 Stream.flush(); 163 } 164 165 bool hasCodeObjectV3(const FeatureBitset &Features) { 166 return Features.test(FeatureCodeObjectV3); 167 } 168 169 unsigned getWavefrontSize(const FeatureBitset &Features) { 170 if (Features.test(FeatureWavefrontSize16)) 171 return 16; 172 if (Features.test(FeatureWavefrontSize32)) 173 return 32; 174 175 return 64; 176 } 177 178 unsigned getLocalMemorySize(const FeatureBitset &Features) { 179 if (Features.test(FeatureLocalMemorySize32768)) 180 return 32768; 181 if (Features.test(FeatureLocalMemorySize65536)) 182 return 65536; 183 184 return 0; 185 } 186 187 unsigned getEUsPerCU(const FeatureBitset &Features) { 188 return 4; 189 } 190 191 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 192 unsigned FlatWorkGroupSize) { 193 if (!Features.test(FeatureGCN)) 194 return 8; 195 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 196 if (N == 1) 197 return 40; 198 N = 40 / N; 199 return std::min(N, 16u); 200 } 201 202 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 203 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 204 } 205 206 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 207 unsigned FlatWorkGroupSize) { 208 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 209 } 210 211 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 212 return 1; 213 } 214 215 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 216 if (!Features.test(FeatureGCN)) 217 return 8; 218 // FIXME: Need to take scratch memory into account. 219 return 10; 220 } 221 222 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 223 unsigned FlatWorkGroupSize) { 224 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 225 getEUsPerCU(Features)) / getEUsPerCU(Features); 226 } 227 228 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 229 return 1; 230 } 231 232 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 233 return 2048; 234 } 235 236 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 237 unsigned FlatWorkGroupSize) { 238 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 239 getWavefrontSize(Features); 240 } 241 242 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 243 IsaVersion Version = getIsaVersion(Features); 244 if (Version.Major >= 8) 245 return 16; 246 return 8; 247 } 248 249 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 250 return 8; 251 } 252 253 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 254 IsaVersion Version = getIsaVersion(Features); 255 if (Version.Major >= 8) 256 return 800; 257 return 512; 258 } 259 260 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 261 if (Features.test(FeatureSGPRInitBug)) 262 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 263 264 IsaVersion Version = getIsaVersion(Features); 265 if (Version.Major >= 8) 266 return 102; 267 return 104; 268 } 269 270 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 271 assert(WavesPerEU != 0); 272 273 if (WavesPerEU >= getMaxWavesPerEU(Features)) 274 return 0; 275 unsigned MinNumSGPRs = 276 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 277 getSGPRAllocGranule(Features)) + 1; 278 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 279 } 280 281 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 282 bool Addressable) { 283 assert(WavesPerEU != 0); 284 285 IsaVersion Version = getIsaVersion(Features); 286 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 287 getSGPRAllocGranule(Features)); 288 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 289 if (Version.Major >= 8 && !Addressable) 290 AddressableNumSGPRs = 112; 291 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 292 } 293 294 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 295 return 4; 296 } 297 298 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 299 return getVGPRAllocGranule(Features); 300 } 301 302 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 303 return 256; 304 } 305 306 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 307 return getTotalNumVGPRs(Features); 308 } 309 310 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 311 assert(WavesPerEU != 0); 312 313 if (WavesPerEU >= getMaxWavesPerEU(Features)) 314 return 0; 315 unsigned MinNumVGPRs = 316 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 317 getVGPRAllocGranule(Features)) + 1; 318 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 319 } 320 321 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 322 assert(WavesPerEU != 0); 323 324 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 325 getVGPRAllocGranule(Features)); 326 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 327 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 328 } 329 330 } // end namespace IsaInfo 331 332 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 333 const FeatureBitset &Features) { 334 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 335 336 memset(&Header, 0, sizeof(Header)); 337 338 Header.amd_kernel_code_version_major = 1; 339 Header.amd_kernel_code_version_minor = 1; 340 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 341 Header.amd_machine_version_major = ISA.Major; 342 Header.amd_machine_version_minor = ISA.Minor; 343 Header.amd_machine_version_stepping = ISA.Stepping; 344 Header.kernel_code_entry_byte_offset = sizeof(Header); 345 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 346 Header.wavefront_size = 6; 347 348 // If the code object does not support indirect functions, then the value must 349 // be 0xffffffff. 350 Header.call_convention = -1; 351 352 // These alignment values are specified in powers of two, so alignment = 353 // 2^n. The minimum alignment is 2^4 = 16. 354 Header.kernarg_segment_alignment = 4; 355 Header.group_segment_alignment = 4; 356 Header.private_segment_alignment = 4; 357 } 358 359 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 360 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 361 } 362 363 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 364 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 365 } 366 367 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 368 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 369 } 370 371 bool shouldEmitConstantsToTextSection(const Triple &TT) { 372 return TT.getOS() != Triple::AMDHSA; 373 } 374 375 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 376 Attribute A = F.getFnAttribute(Name); 377 int Result = Default; 378 379 if (A.isStringAttribute()) { 380 StringRef Str = A.getValueAsString(); 381 if (Str.getAsInteger(0, Result)) { 382 LLVMContext &Ctx = F.getContext(); 383 Ctx.emitError("can't parse integer attribute " + Name); 384 } 385 } 386 387 return Result; 388 } 389 390 std::pair<int, int> getIntegerPairAttribute(const Function &F, 391 StringRef Name, 392 std::pair<int, int> Default, 393 bool OnlyFirstRequired) { 394 Attribute A = F.getFnAttribute(Name); 395 if (!A.isStringAttribute()) 396 return Default; 397 398 LLVMContext &Ctx = F.getContext(); 399 std::pair<int, int> Ints = Default; 400 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 401 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 402 Ctx.emitError("can't parse first integer attribute " + Name); 403 return Default; 404 } 405 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 406 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 407 Ctx.emitError("can't parse second integer attribute " + Name); 408 return Default; 409 } 410 } 411 412 return Ints; 413 } 414 415 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 416 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 417 if (Version.Major < 9) 418 return VmcntLo; 419 420 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 421 return VmcntLo | VmcntHi; 422 } 423 424 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 425 return (1 << getExpcntBitWidth()) - 1; 426 } 427 428 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 429 return (1 << getLgkmcntBitWidth()) - 1; 430 } 431 432 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 433 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 434 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 435 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 436 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 437 if (Version.Major < 9) 438 return Waitcnt; 439 440 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 441 return Waitcnt | VmcntHi; 442 } 443 444 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 445 unsigned VmcntLo = 446 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 447 if (Version.Major < 9) 448 return VmcntLo; 449 450 unsigned VmcntHi = 451 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 452 VmcntHi <<= getVmcntBitWidthLo(); 453 return VmcntLo | VmcntHi; 454 } 455 456 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 457 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 458 } 459 460 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 461 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 462 } 463 464 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 465 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 466 Vmcnt = decodeVmcnt(Version, Waitcnt); 467 Expcnt = decodeExpcnt(Version, Waitcnt); 468 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 469 } 470 471 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 472 unsigned Vmcnt) { 473 Waitcnt = 474 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 475 if (Version.Major < 9) 476 return Waitcnt; 477 478 Vmcnt >>= getVmcntBitWidthLo(); 479 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 480 } 481 482 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 483 unsigned Expcnt) { 484 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 485 } 486 487 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 488 unsigned Lgkmcnt) { 489 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 490 } 491 492 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 493 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 494 unsigned Waitcnt = getWaitcntBitMask(Version); 495 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 496 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 497 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 498 return Waitcnt; 499 } 500 501 unsigned getInitialPSInputAddr(const Function &F) { 502 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 503 } 504 505 bool isShader(CallingConv::ID cc) { 506 switch(cc) { 507 case CallingConv::AMDGPU_VS: 508 case CallingConv::AMDGPU_LS: 509 case CallingConv::AMDGPU_HS: 510 case CallingConv::AMDGPU_ES: 511 case CallingConv::AMDGPU_GS: 512 case CallingConv::AMDGPU_PS: 513 case CallingConv::AMDGPU_CS: 514 return true; 515 default: 516 return false; 517 } 518 } 519 520 bool isCompute(CallingConv::ID cc) { 521 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 522 } 523 524 bool isEntryFunctionCC(CallingConv::ID CC) { 525 switch (CC) { 526 case CallingConv::AMDGPU_KERNEL: 527 case CallingConv::SPIR_KERNEL: 528 case CallingConv::AMDGPU_VS: 529 case CallingConv::AMDGPU_GS: 530 case CallingConv::AMDGPU_PS: 531 case CallingConv::AMDGPU_CS: 532 case CallingConv::AMDGPU_ES: 533 case CallingConv::AMDGPU_HS: 534 case CallingConv::AMDGPU_LS: 535 return true; 536 default: 537 return false; 538 } 539 } 540 541 bool isSI(const MCSubtargetInfo &STI) { 542 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 543 } 544 545 bool isCI(const MCSubtargetInfo &STI) { 546 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 547 } 548 549 bool isVI(const MCSubtargetInfo &STI) { 550 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 551 } 552 553 bool isGFX9(const MCSubtargetInfo &STI) { 554 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 555 } 556 557 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 558 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 559 } 560 561 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 562 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 563 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 564 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 565 Reg == AMDGPU::SCC; 566 } 567 568 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 569 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 570 if (*R == Reg1) return true; 571 } 572 return false; 573 } 574 575 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 576 577 switch(Reg) { 578 default: break; 579 case AMDGPU::FLAT_SCR: 580 assert(!isSI(STI)); 581 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 582 583 case AMDGPU::FLAT_SCR_LO: 584 assert(!isSI(STI)); 585 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 586 587 case AMDGPU::FLAT_SCR_HI: 588 assert(!isSI(STI)); 589 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 590 } 591 return Reg; 592 } 593 594 unsigned mc2PseudoReg(unsigned Reg) { 595 switch (Reg) { 596 case AMDGPU::FLAT_SCR_ci: 597 case AMDGPU::FLAT_SCR_vi: 598 return FLAT_SCR; 599 600 case AMDGPU::FLAT_SCR_LO_ci: 601 case AMDGPU::FLAT_SCR_LO_vi: 602 return AMDGPU::FLAT_SCR_LO; 603 604 case AMDGPU::FLAT_SCR_HI_ci: 605 case AMDGPU::FLAT_SCR_HI_vi: 606 return AMDGPU::FLAT_SCR_HI; 607 608 default: 609 return Reg; 610 } 611 } 612 613 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 614 assert(OpNo < Desc.NumOperands); 615 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 616 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 617 OpType <= AMDGPU::OPERAND_SRC_LAST; 618 } 619 620 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 621 assert(OpNo < Desc.NumOperands); 622 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 623 switch (OpType) { 624 case AMDGPU::OPERAND_REG_IMM_FP32: 625 case AMDGPU::OPERAND_REG_IMM_FP64: 626 case AMDGPU::OPERAND_REG_IMM_FP16: 627 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 628 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 629 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 630 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 631 return true; 632 default: 633 return false; 634 } 635 } 636 637 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 638 assert(OpNo < Desc.NumOperands); 639 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 640 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 641 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 642 } 643 644 // Avoid using MCRegisterClass::getSize, since that function will go away 645 // (move from MC* level to Target* level). Return size in bits. 646 unsigned getRegBitWidth(unsigned RCID) { 647 switch (RCID) { 648 case AMDGPU::SGPR_32RegClassID: 649 case AMDGPU::VGPR_32RegClassID: 650 case AMDGPU::VS_32RegClassID: 651 case AMDGPU::SReg_32RegClassID: 652 case AMDGPU::SReg_32_XM0RegClassID: 653 return 32; 654 case AMDGPU::SGPR_64RegClassID: 655 case AMDGPU::VS_64RegClassID: 656 case AMDGPU::SReg_64RegClassID: 657 case AMDGPU::VReg_64RegClassID: 658 return 64; 659 case AMDGPU::VReg_96RegClassID: 660 return 96; 661 case AMDGPU::SGPR_128RegClassID: 662 case AMDGPU::SReg_128RegClassID: 663 case AMDGPU::VReg_128RegClassID: 664 return 128; 665 case AMDGPU::SReg_256RegClassID: 666 case AMDGPU::VReg_256RegClassID: 667 return 256; 668 case AMDGPU::SReg_512RegClassID: 669 case AMDGPU::VReg_512RegClassID: 670 return 512; 671 default: 672 llvm_unreachable("Unexpected register class"); 673 } 674 } 675 676 unsigned getRegBitWidth(const MCRegisterClass &RC) { 677 return getRegBitWidth(RC.getID()); 678 } 679 680 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 681 unsigned OpNo) { 682 assert(OpNo < Desc.NumOperands); 683 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 684 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 685 } 686 687 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 688 if (Literal >= -16 && Literal <= 64) 689 return true; 690 691 uint64_t Val = static_cast<uint64_t>(Literal); 692 return (Val == DoubleToBits(0.0)) || 693 (Val == DoubleToBits(1.0)) || 694 (Val == DoubleToBits(-1.0)) || 695 (Val == DoubleToBits(0.5)) || 696 (Val == DoubleToBits(-0.5)) || 697 (Val == DoubleToBits(2.0)) || 698 (Val == DoubleToBits(-2.0)) || 699 (Val == DoubleToBits(4.0)) || 700 (Val == DoubleToBits(-4.0)) || 701 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 702 } 703 704 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 705 if (Literal >= -16 && Literal <= 64) 706 return true; 707 708 // The actual type of the operand does not seem to matter as long 709 // as the bits match one of the inline immediate values. For example: 710 // 711 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 712 // so it is a legal inline immediate. 713 // 714 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 715 // floating-point, so it is a legal inline immediate. 716 717 uint32_t Val = static_cast<uint32_t>(Literal); 718 return (Val == FloatToBits(0.0f)) || 719 (Val == FloatToBits(1.0f)) || 720 (Val == FloatToBits(-1.0f)) || 721 (Val == FloatToBits(0.5f)) || 722 (Val == FloatToBits(-0.5f)) || 723 (Val == FloatToBits(2.0f)) || 724 (Val == FloatToBits(-2.0f)) || 725 (Val == FloatToBits(4.0f)) || 726 (Val == FloatToBits(-4.0f)) || 727 (Val == 0x3e22f983 && HasInv2Pi); 728 } 729 730 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 731 if (!HasInv2Pi) 732 return false; 733 734 if (Literal >= -16 && Literal <= 64) 735 return true; 736 737 uint16_t Val = static_cast<uint16_t>(Literal); 738 return Val == 0x3C00 || // 1.0 739 Val == 0xBC00 || // -1.0 740 Val == 0x3800 || // 0.5 741 Val == 0xB800 || // -0.5 742 Val == 0x4000 || // 2.0 743 Val == 0xC000 || // -2.0 744 Val == 0x4400 || // 4.0 745 Val == 0xC400 || // -4.0 746 Val == 0x3118; // 1/2pi 747 } 748 749 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 750 assert(HasInv2Pi); 751 752 if (!EnablePackedInlinableLiterals) 753 return false; 754 755 int16_t Lo16 = static_cast<int16_t>(Literal); 756 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 757 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 758 } 759 760 bool isArgPassedInSGPR(const Argument *A) { 761 const Function *F = A->getParent(); 762 763 // Arguments to compute shaders are never a source of divergence. 764 CallingConv::ID CC = F->getCallingConv(); 765 switch (CC) { 766 case CallingConv::AMDGPU_KERNEL: 767 case CallingConv::SPIR_KERNEL: 768 return true; 769 case CallingConv::AMDGPU_VS: 770 case CallingConv::AMDGPU_LS: 771 case CallingConv::AMDGPU_HS: 772 case CallingConv::AMDGPU_ES: 773 case CallingConv::AMDGPU_GS: 774 case CallingConv::AMDGPU_PS: 775 case CallingConv::AMDGPU_CS: 776 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 777 // Everything else is in VGPRs. 778 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 779 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 780 default: 781 // TODO: Should calls support inreg for SGPR inputs? 782 return false; 783 } 784 } 785 786 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. 787 bool isUniformMMO(const MachineMemOperand *MMO) { 788 const Value *Ptr = MMO->getValue(); 789 // UndefValue means this is a load of a kernel input. These are uniform. 790 // Sometimes LDS instructions have constant pointers. 791 // If Ptr is null, then that means this mem operand contains a 792 // PseudoSourceValue like GOT. 793 if (!Ptr || isa<UndefValue>(Ptr) || 794 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 795 return true; 796 797 if (const Argument *Arg = dyn_cast<Argument>(Ptr)) 798 return isArgPassedInSGPR(Arg); 799 800 const Instruction *I = dyn_cast<Instruction>(Ptr); 801 return I && I->getMetadata("amdgpu.uniform"); 802 } 803 804 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 805 if (isGCN3Encoding(ST)) 806 return ByteOffset; 807 return ByteOffset >> 2; 808 } 809 810 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 811 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 812 return isGCN3Encoding(ST) ? 813 isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 814 } 815 } // end namespace AMDGPU 816 817 } // end namespace llvm 818 819 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 820 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 821 const unsigned AMDGPUAS::LOCAL_ADDRESS; 822 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 823 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 824 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 825 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 826 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 827 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 828 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 829 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 830 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 831 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 832 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 833 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 834 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 835 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 836 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 837 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 838 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 839 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 840 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 841 842 namespace llvm { 843 namespace AMDGPU { 844 845 AMDGPUAS getAMDGPUAS(Triple T) { 846 auto Env = T.getEnvironmentName(); 847 AMDGPUAS AS; 848 if (Env == "amdgiz" || Env == "amdgizcl") { 849 AS.FLAT_ADDRESS = 0; 850 AS.PRIVATE_ADDRESS = 5; 851 AS.REGION_ADDRESS = 4; 852 } 853 else { 854 AS.FLAT_ADDRESS = 4; 855 AS.PRIVATE_ADDRESS = 0; 856 AS.REGION_ADDRESS = 5; 857 } 858 return AS; 859 } 860 861 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 862 return getAMDGPUAS(M.getTargetTriple()); 863 } 864 865 AMDGPUAS getAMDGPUAS(const Module &M) { 866 return getAMDGPUAS(Triple(M.getTargetTriple())); 867 } 868 } // namespace AMDGPU 869 } // namespace llvm 870