1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/MC/MCSectionELF.h" 28 #include "llvm/MC/MCSubtargetInfo.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 #define GET_INSTRINFO_NAMED_OPS 42 #include "AMDGPUGenInstrInfo.inc" 43 #undef GET_INSTRINFO_NAMED_OPS 44 45 namespace { 46 47 /// \returns Bit mask for given bit \p Shift and bit \p Width. 48 unsigned getBitMask(unsigned Shift, unsigned Width) { 49 return ((1 << Width) - 1) << Shift; 50 } 51 52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Packed \p Dst. 55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 56 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 57 Dst |= (Src << Shift) & getBitMask(Shift, Width); 58 return Dst; 59 } 60 61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 62 /// 63 /// \returns Unpacked bits. 64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 65 return (Src & getBitMask(Shift, Width)) >> Shift; 66 } 67 68 /// \returns Vmcnt bit shift (lower bits). 69 unsigned getVmcntBitShiftLo() { return 0; } 70 71 /// \returns Vmcnt bit width (lower bits). 72 unsigned getVmcntBitWidthLo() { return 4; } 73 74 /// \returns Expcnt bit shift. 75 unsigned getExpcntBitShift() { return 4; } 76 77 /// \returns Expcnt bit width. 78 unsigned getExpcntBitWidth() { return 3; } 79 80 /// \returns Lgkmcnt bit shift. 81 unsigned getLgkmcntBitShift() { return 8; } 82 83 /// \returns Lgkmcnt bit width. 84 unsigned getLgkmcntBitWidth() { return 4; } 85 86 /// \returns Vmcnt bit shift (higher bits). 87 unsigned getVmcntBitShiftHi() { return 14; } 88 89 /// \returns Vmcnt bit width (higher bits). 90 unsigned getVmcntBitWidthHi() { return 2; } 91 92 } // end namespace anonymous 93 94 namespace llvm { 95 96 static cl::opt<bool> EnablePackedInlinableLiterals( 97 "enable-packed-inlinable-literals", 98 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 99 cl::init(false)); 100 101 namespace AMDGPU { 102 103 namespace IsaInfo { 104 105 IsaVersion getIsaVersion(const FeatureBitset &Features) { 106 // SI. 107 if (Features.test(FeatureISAVersion6_0_0)) 108 return {6, 0, 0}; 109 if (Features.test(FeatureISAVersion6_0_1)) 110 return {6, 0, 1}; 111 // CI. 112 if (Features.test(FeatureISAVersion7_0_0)) 113 return {7, 0, 0}; 114 if (Features.test(FeatureISAVersion7_0_1)) 115 return {7, 0, 1}; 116 if (Features.test(FeatureISAVersion7_0_2)) 117 return {7, 0, 2}; 118 if (Features.test(FeatureISAVersion7_0_3)) 119 return {7, 0, 3}; 120 121 // VI. 122 if (Features.test(FeatureISAVersion8_0_0)) 123 return {8, 0, 0}; 124 if (Features.test(FeatureISAVersion8_0_1)) 125 return {8, 0, 1}; 126 if (Features.test(FeatureISAVersion8_0_2)) 127 return {8, 0, 2}; 128 if (Features.test(FeatureISAVersion8_0_3)) 129 return {8, 0, 3}; 130 if (Features.test(FeatureISAVersion8_0_4)) 131 return {8, 0, 4}; 132 if (Features.test(FeatureISAVersion8_1_0)) 133 return {8, 1, 0}; 134 135 // GFX9. 136 if (Features.test(FeatureISAVersion9_0_0)) 137 return {9, 0, 0}; 138 if (Features.test(FeatureISAVersion9_0_1)) 139 return {9, 0, 1}; 140 if (Features.test(FeatureISAVersion9_0_2)) 141 return {9, 0, 2}; 142 if (Features.test(FeatureISAVersion9_0_3)) 143 return {9, 0, 3}; 144 145 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 146 return {0, 0, 0}; 147 return {7, 0, 0}; 148 } 149 150 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { 151 auto TargetTriple = STI->getTargetTriple(); 152 auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); 153 154 Stream << TargetTriple.getArchName() << '-' 155 << TargetTriple.getVendorName() << '-' 156 << TargetTriple.getOSName() << '-' 157 << TargetTriple.getEnvironmentName() << '-' 158 << "gfx" 159 << ISAVersion.Major 160 << ISAVersion.Minor 161 << ISAVersion.Stepping; 162 Stream.flush(); 163 } 164 165 unsigned getWavefrontSize(const FeatureBitset &Features) { 166 if (Features.test(FeatureWavefrontSize16)) 167 return 16; 168 if (Features.test(FeatureWavefrontSize32)) 169 return 32; 170 171 return 64; 172 } 173 174 unsigned getLocalMemorySize(const FeatureBitset &Features) { 175 if (Features.test(FeatureLocalMemorySize32768)) 176 return 32768; 177 if (Features.test(FeatureLocalMemorySize65536)) 178 return 65536; 179 180 return 0; 181 } 182 183 unsigned getEUsPerCU(const FeatureBitset &Features) { 184 return 4; 185 } 186 187 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 188 unsigned FlatWorkGroupSize) { 189 if (!Features.test(FeatureGCN)) 190 return 8; 191 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 192 if (N == 1) 193 return 40; 194 N = 40 / N; 195 return std::min(N, 16u); 196 } 197 198 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 199 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 200 } 201 202 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 203 unsigned FlatWorkGroupSize) { 204 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 205 } 206 207 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 208 return 1; 209 } 210 211 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 212 if (!Features.test(FeatureGCN)) 213 return 8; 214 // FIXME: Need to take scratch memory into account. 215 return 10; 216 } 217 218 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 219 unsigned FlatWorkGroupSize) { 220 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 221 getEUsPerCU(Features)) / getEUsPerCU(Features); 222 } 223 224 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 225 return 1; 226 } 227 228 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 229 return 2048; 230 } 231 232 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 233 unsigned FlatWorkGroupSize) { 234 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 235 getWavefrontSize(Features); 236 } 237 238 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 239 IsaVersion Version = getIsaVersion(Features); 240 if (Version.Major >= 8) 241 return 16; 242 return 8; 243 } 244 245 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 246 return 8; 247 } 248 249 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 250 IsaVersion Version = getIsaVersion(Features); 251 if (Version.Major >= 8) 252 return 800; 253 return 512; 254 } 255 256 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 257 if (Features.test(FeatureSGPRInitBug)) 258 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 259 260 IsaVersion Version = getIsaVersion(Features); 261 if (Version.Major >= 8) 262 return 102; 263 return 104; 264 } 265 266 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 267 assert(WavesPerEU != 0); 268 269 if (WavesPerEU >= getMaxWavesPerEU(Features)) 270 return 0; 271 unsigned MinNumSGPRs = 272 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 273 getSGPRAllocGranule(Features)) + 1; 274 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 275 } 276 277 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 278 bool Addressable) { 279 assert(WavesPerEU != 0); 280 281 IsaVersion Version = getIsaVersion(Features); 282 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 283 getSGPRAllocGranule(Features)); 284 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 285 if (Version.Major >= 8 && !Addressable) 286 AddressableNumSGPRs = 112; 287 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 288 } 289 290 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 291 return 4; 292 } 293 294 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 295 return getVGPRAllocGranule(Features); 296 } 297 298 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 299 return 256; 300 } 301 302 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 303 return getTotalNumVGPRs(Features); 304 } 305 306 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 307 assert(WavesPerEU != 0); 308 309 if (WavesPerEU >= getMaxWavesPerEU(Features)) 310 return 0; 311 unsigned MinNumVGPRs = 312 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 313 getVGPRAllocGranule(Features)) + 1; 314 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 315 } 316 317 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 318 assert(WavesPerEU != 0); 319 320 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 321 getVGPRAllocGranule(Features)); 322 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 323 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 324 } 325 326 } // end namespace IsaInfo 327 328 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 329 const FeatureBitset &Features) { 330 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 331 332 memset(&Header, 0, sizeof(Header)); 333 334 Header.amd_kernel_code_version_major = 1; 335 Header.amd_kernel_code_version_minor = 1; 336 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 337 Header.amd_machine_version_major = ISA.Major; 338 Header.amd_machine_version_minor = ISA.Minor; 339 Header.amd_machine_version_stepping = ISA.Stepping; 340 Header.kernel_code_entry_byte_offset = sizeof(Header); 341 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 342 Header.wavefront_size = 6; 343 344 // If the code object does not support indirect functions, then the value must 345 // be 0xffffffff. 346 Header.call_convention = -1; 347 348 // These alignment values are specified in powers of two, so alignment = 349 // 2^n. The minimum alignment is 2^4 = 16. 350 Header.kernarg_segment_alignment = 4; 351 Header.group_segment_alignment = 4; 352 Header.private_segment_alignment = 4; 353 } 354 355 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 356 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 357 } 358 359 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 360 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 361 } 362 363 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 364 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 365 } 366 367 bool shouldEmitConstantsToTextSection(const Triple &TT) { 368 return TT.getOS() != Triple::AMDHSA; 369 } 370 371 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 372 Attribute A = F.getFnAttribute(Name); 373 int Result = Default; 374 375 if (A.isStringAttribute()) { 376 StringRef Str = A.getValueAsString(); 377 if (Str.getAsInteger(0, Result)) { 378 LLVMContext &Ctx = F.getContext(); 379 Ctx.emitError("can't parse integer attribute " + Name); 380 } 381 } 382 383 return Result; 384 } 385 386 std::pair<int, int> getIntegerPairAttribute(const Function &F, 387 StringRef Name, 388 std::pair<int, int> Default, 389 bool OnlyFirstRequired) { 390 Attribute A = F.getFnAttribute(Name); 391 if (!A.isStringAttribute()) 392 return Default; 393 394 LLVMContext &Ctx = F.getContext(); 395 std::pair<int, int> Ints = Default; 396 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 397 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 398 Ctx.emitError("can't parse first integer attribute " + Name); 399 return Default; 400 } 401 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 402 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 403 Ctx.emitError("can't parse second integer attribute " + Name); 404 return Default; 405 } 406 } 407 408 return Ints; 409 } 410 411 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 412 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 413 if (Version.Major < 9) 414 return VmcntLo; 415 416 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 417 return VmcntLo | VmcntHi; 418 } 419 420 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 421 return (1 << getExpcntBitWidth()) - 1; 422 } 423 424 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 425 return (1 << getLgkmcntBitWidth()) - 1; 426 } 427 428 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 429 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 430 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 431 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 432 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 433 if (Version.Major < 9) 434 return Waitcnt; 435 436 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 437 return Waitcnt | VmcntHi; 438 } 439 440 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 441 unsigned VmcntLo = 442 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 443 if (Version.Major < 9) 444 return VmcntLo; 445 446 unsigned VmcntHi = 447 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 448 VmcntHi <<= getVmcntBitWidthLo(); 449 return VmcntLo | VmcntHi; 450 } 451 452 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 453 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 454 } 455 456 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 457 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 458 } 459 460 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 461 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 462 Vmcnt = decodeVmcnt(Version, Waitcnt); 463 Expcnt = decodeExpcnt(Version, Waitcnt); 464 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 465 } 466 467 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 468 unsigned Vmcnt) { 469 Waitcnt = 470 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 471 if (Version.Major < 9) 472 return Waitcnt; 473 474 Vmcnt >>= getVmcntBitWidthLo(); 475 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 476 } 477 478 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 479 unsigned Expcnt) { 480 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 481 } 482 483 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 484 unsigned Lgkmcnt) { 485 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 486 } 487 488 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 489 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 490 unsigned Waitcnt = getWaitcntBitMask(Version); 491 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 492 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 493 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 494 return Waitcnt; 495 } 496 497 unsigned getInitialPSInputAddr(const Function &F) { 498 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 499 } 500 501 bool isShader(CallingConv::ID cc) { 502 switch(cc) { 503 case CallingConv::AMDGPU_VS: 504 case CallingConv::AMDGPU_LS: 505 case CallingConv::AMDGPU_HS: 506 case CallingConv::AMDGPU_ES: 507 case CallingConv::AMDGPU_GS: 508 case CallingConv::AMDGPU_PS: 509 case CallingConv::AMDGPU_CS: 510 return true; 511 default: 512 return false; 513 } 514 } 515 516 bool isCompute(CallingConv::ID cc) { 517 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 518 } 519 520 bool isEntryFunctionCC(CallingConv::ID CC) { 521 switch (CC) { 522 case CallingConv::AMDGPU_KERNEL: 523 case CallingConv::SPIR_KERNEL: 524 case CallingConv::AMDGPU_VS: 525 case CallingConv::AMDGPU_GS: 526 case CallingConv::AMDGPU_PS: 527 case CallingConv::AMDGPU_CS: 528 case CallingConv::AMDGPU_ES: 529 case CallingConv::AMDGPU_HS: 530 case CallingConv::AMDGPU_LS: 531 return true; 532 default: 533 return false; 534 } 535 } 536 537 bool isSI(const MCSubtargetInfo &STI) { 538 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 539 } 540 541 bool isCI(const MCSubtargetInfo &STI) { 542 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 543 } 544 545 bool isVI(const MCSubtargetInfo &STI) { 546 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 547 } 548 549 bool isGFX9(const MCSubtargetInfo &STI) { 550 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 551 } 552 553 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 554 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 555 } 556 557 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 558 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 559 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 560 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 561 Reg == AMDGPU::SCC; 562 } 563 564 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 565 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 566 if (*R == Reg1) return true; 567 } 568 return false; 569 } 570 571 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 572 573 switch(Reg) { 574 default: break; 575 case AMDGPU::FLAT_SCR: 576 assert(!isSI(STI)); 577 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 578 579 case AMDGPU::FLAT_SCR_LO: 580 assert(!isSI(STI)); 581 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 582 583 case AMDGPU::FLAT_SCR_HI: 584 assert(!isSI(STI)); 585 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 586 } 587 return Reg; 588 } 589 590 unsigned mc2PseudoReg(unsigned Reg) { 591 switch (Reg) { 592 case AMDGPU::FLAT_SCR_ci: 593 case AMDGPU::FLAT_SCR_vi: 594 return FLAT_SCR; 595 596 case AMDGPU::FLAT_SCR_LO_ci: 597 case AMDGPU::FLAT_SCR_LO_vi: 598 return AMDGPU::FLAT_SCR_LO; 599 600 case AMDGPU::FLAT_SCR_HI_ci: 601 case AMDGPU::FLAT_SCR_HI_vi: 602 return AMDGPU::FLAT_SCR_HI; 603 604 default: 605 return Reg; 606 } 607 } 608 609 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 610 assert(OpNo < Desc.NumOperands); 611 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 612 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 613 OpType <= AMDGPU::OPERAND_SRC_LAST; 614 } 615 616 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 617 assert(OpNo < Desc.NumOperands); 618 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 619 switch (OpType) { 620 case AMDGPU::OPERAND_REG_IMM_FP32: 621 case AMDGPU::OPERAND_REG_IMM_FP64: 622 case AMDGPU::OPERAND_REG_IMM_FP16: 623 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 624 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 625 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 626 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 627 return true; 628 default: 629 return false; 630 } 631 } 632 633 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 634 assert(OpNo < Desc.NumOperands); 635 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 636 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 637 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 638 } 639 640 // Avoid using MCRegisterClass::getSize, since that function will go away 641 // (move from MC* level to Target* level). Return size in bits. 642 unsigned getRegBitWidth(unsigned RCID) { 643 switch (RCID) { 644 case AMDGPU::SGPR_32RegClassID: 645 case AMDGPU::VGPR_32RegClassID: 646 case AMDGPU::VS_32RegClassID: 647 case AMDGPU::SReg_32RegClassID: 648 case AMDGPU::SReg_32_XM0RegClassID: 649 return 32; 650 case AMDGPU::SGPR_64RegClassID: 651 case AMDGPU::VS_64RegClassID: 652 case AMDGPU::SReg_64RegClassID: 653 case AMDGPU::VReg_64RegClassID: 654 return 64; 655 case AMDGPU::VReg_96RegClassID: 656 return 96; 657 case AMDGPU::SGPR_128RegClassID: 658 case AMDGPU::SReg_128RegClassID: 659 case AMDGPU::VReg_128RegClassID: 660 return 128; 661 case AMDGPU::SReg_256RegClassID: 662 case AMDGPU::VReg_256RegClassID: 663 return 256; 664 case AMDGPU::SReg_512RegClassID: 665 case AMDGPU::VReg_512RegClassID: 666 return 512; 667 default: 668 llvm_unreachable("Unexpected register class"); 669 } 670 } 671 672 unsigned getRegBitWidth(const MCRegisterClass &RC) { 673 return getRegBitWidth(RC.getID()); 674 } 675 676 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 677 unsigned OpNo) { 678 assert(OpNo < Desc.NumOperands); 679 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 680 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 681 } 682 683 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 684 if (Literal >= -16 && Literal <= 64) 685 return true; 686 687 uint64_t Val = static_cast<uint64_t>(Literal); 688 return (Val == DoubleToBits(0.0)) || 689 (Val == DoubleToBits(1.0)) || 690 (Val == DoubleToBits(-1.0)) || 691 (Val == DoubleToBits(0.5)) || 692 (Val == DoubleToBits(-0.5)) || 693 (Val == DoubleToBits(2.0)) || 694 (Val == DoubleToBits(-2.0)) || 695 (Val == DoubleToBits(4.0)) || 696 (Val == DoubleToBits(-4.0)) || 697 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 698 } 699 700 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 701 if (Literal >= -16 && Literal <= 64) 702 return true; 703 704 // The actual type of the operand does not seem to matter as long 705 // as the bits match one of the inline immediate values. For example: 706 // 707 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 708 // so it is a legal inline immediate. 709 // 710 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 711 // floating-point, so it is a legal inline immediate. 712 713 uint32_t Val = static_cast<uint32_t>(Literal); 714 return (Val == FloatToBits(0.0f)) || 715 (Val == FloatToBits(1.0f)) || 716 (Val == FloatToBits(-1.0f)) || 717 (Val == FloatToBits(0.5f)) || 718 (Val == FloatToBits(-0.5f)) || 719 (Val == FloatToBits(2.0f)) || 720 (Val == FloatToBits(-2.0f)) || 721 (Val == FloatToBits(4.0f)) || 722 (Val == FloatToBits(-4.0f)) || 723 (Val == 0x3e22f983 && HasInv2Pi); 724 } 725 726 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 727 if (!HasInv2Pi) 728 return false; 729 730 if (Literal >= -16 && Literal <= 64) 731 return true; 732 733 uint16_t Val = static_cast<uint16_t>(Literal); 734 return Val == 0x3C00 || // 1.0 735 Val == 0xBC00 || // -1.0 736 Val == 0x3800 || // 0.5 737 Val == 0xB800 || // -0.5 738 Val == 0x4000 || // 2.0 739 Val == 0xC000 || // -2.0 740 Val == 0x4400 || // 4.0 741 Val == 0xC400 || // -4.0 742 Val == 0x3118; // 1/2pi 743 } 744 745 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 746 assert(HasInv2Pi); 747 748 if (!EnablePackedInlinableLiterals) 749 return false; 750 751 int16_t Lo16 = static_cast<int16_t>(Literal); 752 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 753 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 754 } 755 756 bool isArgPassedInSGPR(const Argument *A) { 757 const Function *F = A->getParent(); 758 759 // Arguments to compute shaders are never a source of divergence. 760 CallingConv::ID CC = F->getCallingConv(); 761 switch (CC) { 762 case CallingConv::AMDGPU_KERNEL: 763 case CallingConv::SPIR_KERNEL: 764 return true; 765 case CallingConv::AMDGPU_VS: 766 case CallingConv::AMDGPU_LS: 767 case CallingConv::AMDGPU_HS: 768 case CallingConv::AMDGPU_ES: 769 case CallingConv::AMDGPU_GS: 770 case CallingConv::AMDGPU_PS: 771 case CallingConv::AMDGPU_CS: 772 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 773 // Everything else is in VGPRs. 774 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 775 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 776 default: 777 // TODO: Should calls support inreg for SGPR inputs? 778 return false; 779 } 780 } 781 782 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. 783 bool isUniformMMO(const MachineMemOperand *MMO) { 784 const Value *Ptr = MMO->getValue(); 785 // UndefValue means this is a load of a kernel input. These are uniform. 786 // Sometimes LDS instructions have constant pointers. 787 // If Ptr is null, then that means this mem operand contains a 788 // PseudoSourceValue like GOT. 789 if (!Ptr || isa<UndefValue>(Ptr) || 790 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 791 return true; 792 793 if (const Argument *Arg = dyn_cast<Argument>(Ptr)) 794 return isArgPassedInSGPR(Arg); 795 796 const Instruction *I = dyn_cast<Instruction>(Ptr); 797 return I && I->getMetadata("amdgpu.uniform"); 798 } 799 800 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 801 if (isGCN3Encoding(ST)) 802 return ByteOffset; 803 return ByteOffset >> 2; 804 } 805 806 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 807 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 808 return isGCN3Encoding(ST) ? 809 isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 810 } 811 } // end namespace AMDGPU 812 813 } // end namespace llvm 814 815 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 816 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 817 const unsigned AMDGPUAS::LOCAL_ADDRESS; 818 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 819 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 820 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 821 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 822 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 823 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 824 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 825 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 826 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 827 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 828 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 829 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 830 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 831 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 832 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 833 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 834 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 835 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 836 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 837 838 namespace llvm { 839 namespace AMDGPU { 840 841 AMDGPUAS getAMDGPUAS(Triple T) { 842 auto Env = T.getEnvironmentName(); 843 AMDGPUAS AS; 844 if (Env == "amdgiz" || Env == "amdgizcl") { 845 AS.FLAT_ADDRESS = 0; 846 AS.PRIVATE_ADDRESS = 5; 847 AS.REGION_ADDRESS = 4; 848 } 849 else { 850 AS.FLAT_ADDRESS = 4; 851 AS.PRIVATE_ADDRESS = 0; 852 AS.REGION_ADDRESS = 5; 853 } 854 return AS; 855 } 856 857 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 858 return getAMDGPUAS(M.getTargetTriple()); 859 } 860 861 AMDGPUAS getAMDGPUAS(const Module &M) { 862 return getAMDGPUAS(Triple(M.getTargetTriple())); 863 } 864 } // namespace AMDGPU 865 } // namespace llvm 866