1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/MC/MCSectionELF.h" 28 #include "llvm/MC/MCSubtargetInfo.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 #define GET_INSTRINFO_NAMED_OPS 42 #include "AMDGPUGenInstrInfo.inc" 43 #undef GET_INSTRINFO_NAMED_OPS 44 45 namespace { 46 47 /// \returns Bit mask for given bit \p Shift and bit \p Width. 48 unsigned getBitMask(unsigned Shift, unsigned Width) { 49 return ((1 << Width) - 1) << Shift; 50 } 51 52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Packed \p Dst. 55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 56 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 57 Dst |= (Src << Shift) & getBitMask(Shift, Width); 58 return Dst; 59 } 60 61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 62 /// 63 /// \returns Unpacked bits. 64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 65 return (Src & getBitMask(Shift, Width)) >> Shift; 66 } 67 68 /// \returns Vmcnt bit shift (lower bits). 69 unsigned getVmcntBitShiftLo() { return 0; } 70 71 /// \returns Vmcnt bit width (lower bits). 72 unsigned getVmcntBitWidthLo() { return 4; } 73 74 /// \returns Expcnt bit shift. 75 unsigned getExpcntBitShift() { return 4; } 76 77 /// \returns Expcnt bit width. 78 unsigned getExpcntBitWidth() { return 3; } 79 80 /// \returns Lgkmcnt bit shift. 81 unsigned getLgkmcntBitShift() { return 8; } 82 83 /// \returns Lgkmcnt bit width. 84 unsigned getLgkmcntBitWidth() { return 4; } 85 86 /// \returns Vmcnt bit shift (higher bits). 87 unsigned getVmcntBitShiftHi() { return 14; } 88 89 /// \returns Vmcnt bit width (higher bits). 90 unsigned getVmcntBitWidthHi() { return 2; } 91 92 } // end namespace anonymous 93 94 namespace llvm { 95 96 static cl::opt<bool> EnablePackedInlinableLiterals( 97 "enable-packed-inlinable-literals", 98 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 99 cl::init(false)); 100 101 namespace AMDGPU { 102 103 namespace IsaInfo { 104 105 IsaVersion getIsaVersion(const FeatureBitset &Features) { 106 // SI. 107 if (Features.test(FeatureISAVersion6_0_0)) 108 return {6, 0, 0}; 109 if (Features.test(FeatureISAVersion6_0_1)) 110 return {6, 0, 1}; 111 // CI. 112 if (Features.test(FeatureISAVersion7_0_0)) 113 return {7, 0, 0}; 114 if (Features.test(FeatureISAVersion7_0_1)) 115 return {7, 0, 1}; 116 if (Features.test(FeatureISAVersion7_0_2)) 117 return {7, 0, 2}; 118 if (Features.test(FeatureISAVersion7_0_3)) 119 return {7, 0, 3}; 120 121 // VI. 122 if (Features.test(FeatureISAVersion8_0_0)) 123 return {8, 0, 0}; 124 if (Features.test(FeatureISAVersion8_0_1)) 125 return {8, 0, 1}; 126 if (Features.test(FeatureISAVersion8_0_2)) 127 return {8, 0, 2}; 128 if (Features.test(FeatureISAVersion8_0_3)) 129 return {8, 0, 3}; 130 if (Features.test(FeatureISAVersion8_0_4)) 131 return {8, 0, 4}; 132 if (Features.test(FeatureISAVersion8_1_0)) 133 return {8, 1, 0}; 134 135 // GFX9. 136 if (Features.test(FeatureISAVersion9_0_0)) 137 return {9, 0, 0}; 138 if (Features.test(FeatureISAVersion9_0_1)) 139 return {9, 0, 1}; 140 if (Features.test(FeatureISAVersion9_0_2)) 141 return {9, 0, 2}; 142 if (Features.test(FeatureISAVersion9_0_3)) 143 return {9, 0, 3}; 144 145 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 146 return {0, 0, 0}; 147 return {7, 0, 0}; 148 } 149 150 unsigned getWavefrontSize(const FeatureBitset &Features) { 151 if (Features.test(FeatureWavefrontSize16)) 152 return 16; 153 if (Features.test(FeatureWavefrontSize32)) 154 return 32; 155 156 return 64; 157 } 158 159 unsigned getLocalMemorySize(const FeatureBitset &Features) { 160 if (Features.test(FeatureLocalMemorySize32768)) 161 return 32768; 162 if (Features.test(FeatureLocalMemorySize65536)) 163 return 65536; 164 165 return 0; 166 } 167 168 unsigned getEUsPerCU(const FeatureBitset &Features) { 169 return 4; 170 } 171 172 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 173 unsigned FlatWorkGroupSize) { 174 if (!Features.test(FeatureGCN)) 175 return 8; 176 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 177 if (N == 1) 178 return 40; 179 N = 40 / N; 180 return std::min(N, 16u); 181 } 182 183 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 184 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 185 } 186 187 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 188 unsigned FlatWorkGroupSize) { 189 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 190 } 191 192 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 193 return 1; 194 } 195 196 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 197 if (!Features.test(FeatureGCN)) 198 return 8; 199 // FIXME: Need to take scratch memory into account. 200 return 10; 201 } 202 203 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 204 unsigned FlatWorkGroupSize) { 205 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 206 getEUsPerCU(Features)) / getEUsPerCU(Features); 207 } 208 209 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 210 return 1; 211 } 212 213 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 214 return 2048; 215 } 216 217 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 218 unsigned FlatWorkGroupSize) { 219 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 220 getWavefrontSize(Features); 221 } 222 223 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 224 IsaVersion Version = getIsaVersion(Features); 225 if (Version.Major >= 8) 226 return 16; 227 return 8; 228 } 229 230 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 231 return 8; 232 } 233 234 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 235 IsaVersion Version = getIsaVersion(Features); 236 if (Version.Major >= 8) 237 return 800; 238 return 512; 239 } 240 241 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 242 if (Features.test(FeatureSGPRInitBug)) 243 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 244 245 IsaVersion Version = getIsaVersion(Features); 246 if (Version.Major >= 8) 247 return 102; 248 return 104; 249 } 250 251 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 252 assert(WavesPerEU != 0); 253 254 if (WavesPerEU >= getMaxWavesPerEU(Features)) 255 return 0; 256 unsigned MinNumSGPRs = 257 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 258 getSGPRAllocGranule(Features)) + 1; 259 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 260 } 261 262 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 263 bool Addressable) { 264 assert(WavesPerEU != 0); 265 266 IsaVersion Version = getIsaVersion(Features); 267 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 268 getSGPRAllocGranule(Features)); 269 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 270 if (Version.Major >= 8 && !Addressable) 271 AddressableNumSGPRs = 112; 272 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 273 } 274 275 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 276 return 4; 277 } 278 279 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 280 return getVGPRAllocGranule(Features); 281 } 282 283 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 284 return 256; 285 } 286 287 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 288 return getTotalNumVGPRs(Features); 289 } 290 291 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 292 assert(WavesPerEU != 0); 293 294 if (WavesPerEU >= getMaxWavesPerEU(Features)) 295 return 0; 296 unsigned MinNumVGPRs = 297 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 298 getVGPRAllocGranule(Features)) + 1; 299 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 300 } 301 302 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 303 assert(WavesPerEU != 0); 304 305 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 306 getVGPRAllocGranule(Features)); 307 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 308 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 309 } 310 311 } // end namespace IsaInfo 312 313 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 314 const FeatureBitset &Features) { 315 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 316 317 memset(&Header, 0, sizeof(Header)); 318 319 Header.amd_kernel_code_version_major = 1; 320 Header.amd_kernel_code_version_minor = 1; 321 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 322 Header.amd_machine_version_major = ISA.Major; 323 Header.amd_machine_version_minor = ISA.Minor; 324 Header.amd_machine_version_stepping = ISA.Stepping; 325 Header.kernel_code_entry_byte_offset = sizeof(Header); 326 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 327 Header.wavefront_size = 6; 328 329 // If the code object does not support indirect functions, then the value must 330 // be 0xffffffff. 331 Header.call_convention = -1; 332 333 // These alignment values are specified in powers of two, so alignment = 334 // 2^n. The minimum alignment is 2^4 = 16. 335 Header.kernarg_segment_alignment = 4; 336 Header.group_segment_alignment = 4; 337 Header.private_segment_alignment = 4; 338 } 339 340 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 341 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 342 } 343 344 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 345 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 346 } 347 348 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 349 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 350 } 351 352 bool shouldEmitConstantsToTextSection(const Triple &TT) { 353 return TT.getOS() != Triple::AMDHSA; 354 } 355 356 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 357 Attribute A = F.getFnAttribute(Name); 358 int Result = Default; 359 360 if (A.isStringAttribute()) { 361 StringRef Str = A.getValueAsString(); 362 if (Str.getAsInteger(0, Result)) { 363 LLVMContext &Ctx = F.getContext(); 364 Ctx.emitError("can't parse integer attribute " + Name); 365 } 366 } 367 368 return Result; 369 } 370 371 std::pair<int, int> getIntegerPairAttribute(const Function &F, 372 StringRef Name, 373 std::pair<int, int> Default, 374 bool OnlyFirstRequired) { 375 Attribute A = F.getFnAttribute(Name); 376 if (!A.isStringAttribute()) 377 return Default; 378 379 LLVMContext &Ctx = F.getContext(); 380 std::pair<int, int> Ints = Default; 381 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 382 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 383 Ctx.emitError("can't parse first integer attribute " + Name); 384 return Default; 385 } 386 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 387 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 388 Ctx.emitError("can't parse second integer attribute " + Name); 389 return Default; 390 } 391 } 392 393 return Ints; 394 } 395 396 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 397 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 398 if (Version.Major < 9) 399 return VmcntLo; 400 401 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 402 return VmcntLo | VmcntHi; 403 } 404 405 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 406 return (1 << getExpcntBitWidth()) - 1; 407 } 408 409 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 410 return (1 << getLgkmcntBitWidth()) - 1; 411 } 412 413 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 414 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 415 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 416 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 417 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 418 if (Version.Major < 9) 419 return Waitcnt; 420 421 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 422 return Waitcnt | VmcntHi; 423 } 424 425 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 426 unsigned VmcntLo = 427 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 428 if (Version.Major < 9) 429 return VmcntLo; 430 431 unsigned VmcntHi = 432 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 433 VmcntHi <<= getVmcntBitWidthLo(); 434 return VmcntLo | VmcntHi; 435 } 436 437 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 438 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 439 } 440 441 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 442 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 443 } 444 445 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 446 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 447 Vmcnt = decodeVmcnt(Version, Waitcnt); 448 Expcnt = decodeExpcnt(Version, Waitcnt); 449 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 450 } 451 452 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 453 unsigned Vmcnt) { 454 Waitcnt = 455 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 456 if (Version.Major < 9) 457 return Waitcnt; 458 459 Vmcnt >>= getVmcntBitWidthLo(); 460 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 461 } 462 463 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 464 unsigned Expcnt) { 465 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 466 } 467 468 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 469 unsigned Lgkmcnt) { 470 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 471 } 472 473 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 474 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 475 unsigned Waitcnt = getWaitcntBitMask(Version); 476 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 477 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 478 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 479 return Waitcnt; 480 } 481 482 unsigned getInitialPSInputAddr(const Function &F) { 483 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 484 } 485 486 bool isShader(CallingConv::ID cc) { 487 switch(cc) { 488 case CallingConv::AMDGPU_VS: 489 case CallingConv::AMDGPU_HS: 490 case CallingConv::AMDGPU_GS: 491 case CallingConv::AMDGPU_PS: 492 case CallingConv::AMDGPU_CS: 493 return true; 494 default: 495 return false; 496 } 497 } 498 499 bool isCompute(CallingConv::ID cc) { 500 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 501 } 502 503 bool isEntryFunctionCC(CallingConv::ID CC) { 504 switch (CC) { 505 case CallingConv::AMDGPU_KERNEL: 506 case CallingConv::SPIR_KERNEL: 507 case CallingConv::AMDGPU_VS: 508 case CallingConv::AMDGPU_GS: 509 case CallingConv::AMDGPU_PS: 510 case CallingConv::AMDGPU_CS: 511 case CallingConv::AMDGPU_HS: 512 return true; 513 default: 514 return false; 515 } 516 } 517 518 bool isSI(const MCSubtargetInfo &STI) { 519 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 520 } 521 522 bool isCI(const MCSubtargetInfo &STI) { 523 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 524 } 525 526 bool isVI(const MCSubtargetInfo &STI) { 527 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 528 } 529 530 bool isGFX9(const MCSubtargetInfo &STI) { 531 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 532 } 533 534 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 535 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 536 } 537 538 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 539 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 540 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 541 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 542 Reg == AMDGPU::SCC; 543 } 544 545 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 546 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 547 if (*R == Reg1) return true; 548 } 549 return false; 550 } 551 552 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 553 554 switch(Reg) { 555 default: break; 556 case AMDGPU::FLAT_SCR: 557 assert(!isSI(STI)); 558 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 559 560 case AMDGPU::FLAT_SCR_LO: 561 assert(!isSI(STI)); 562 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 563 564 case AMDGPU::FLAT_SCR_HI: 565 assert(!isSI(STI)); 566 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 567 } 568 return Reg; 569 } 570 571 unsigned mc2PseudoReg(unsigned Reg) { 572 switch (Reg) { 573 case AMDGPU::FLAT_SCR_ci: 574 case AMDGPU::FLAT_SCR_vi: 575 return FLAT_SCR; 576 577 case AMDGPU::FLAT_SCR_LO_ci: 578 case AMDGPU::FLAT_SCR_LO_vi: 579 return AMDGPU::FLAT_SCR_LO; 580 581 case AMDGPU::FLAT_SCR_HI_ci: 582 case AMDGPU::FLAT_SCR_HI_vi: 583 return AMDGPU::FLAT_SCR_HI; 584 585 default: 586 return Reg; 587 } 588 } 589 590 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 591 assert(OpNo < Desc.NumOperands); 592 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 593 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 594 OpType <= AMDGPU::OPERAND_SRC_LAST; 595 } 596 597 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 598 assert(OpNo < Desc.NumOperands); 599 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 600 switch (OpType) { 601 case AMDGPU::OPERAND_REG_IMM_FP32: 602 case AMDGPU::OPERAND_REG_IMM_FP64: 603 case AMDGPU::OPERAND_REG_IMM_FP16: 604 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 605 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 606 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 607 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 608 return true; 609 default: 610 return false; 611 } 612 } 613 614 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 615 assert(OpNo < Desc.NumOperands); 616 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 617 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 618 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 619 } 620 621 // Avoid using MCRegisterClass::getSize, since that function will go away 622 // (move from MC* level to Target* level). Return size in bits. 623 unsigned getRegBitWidth(unsigned RCID) { 624 switch (RCID) { 625 case AMDGPU::SGPR_32RegClassID: 626 case AMDGPU::VGPR_32RegClassID: 627 case AMDGPU::VS_32RegClassID: 628 case AMDGPU::SReg_32RegClassID: 629 case AMDGPU::SReg_32_XM0RegClassID: 630 return 32; 631 case AMDGPU::SGPR_64RegClassID: 632 case AMDGPU::VS_64RegClassID: 633 case AMDGPU::SReg_64RegClassID: 634 case AMDGPU::VReg_64RegClassID: 635 return 64; 636 case AMDGPU::VReg_96RegClassID: 637 return 96; 638 case AMDGPU::SGPR_128RegClassID: 639 case AMDGPU::SReg_128RegClassID: 640 case AMDGPU::VReg_128RegClassID: 641 return 128; 642 case AMDGPU::SReg_256RegClassID: 643 case AMDGPU::VReg_256RegClassID: 644 return 256; 645 case AMDGPU::SReg_512RegClassID: 646 case AMDGPU::VReg_512RegClassID: 647 return 512; 648 default: 649 llvm_unreachable("Unexpected register class"); 650 } 651 } 652 653 unsigned getRegBitWidth(const MCRegisterClass &RC) { 654 return getRegBitWidth(RC.getID()); 655 } 656 657 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 658 unsigned OpNo) { 659 assert(OpNo < Desc.NumOperands); 660 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 661 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 662 } 663 664 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 665 if (Literal >= -16 && Literal <= 64) 666 return true; 667 668 uint64_t Val = static_cast<uint64_t>(Literal); 669 return (Val == DoubleToBits(0.0)) || 670 (Val == DoubleToBits(1.0)) || 671 (Val == DoubleToBits(-1.0)) || 672 (Val == DoubleToBits(0.5)) || 673 (Val == DoubleToBits(-0.5)) || 674 (Val == DoubleToBits(2.0)) || 675 (Val == DoubleToBits(-2.0)) || 676 (Val == DoubleToBits(4.0)) || 677 (Val == DoubleToBits(-4.0)) || 678 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 679 } 680 681 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 682 if (Literal >= -16 && Literal <= 64) 683 return true; 684 685 // The actual type of the operand does not seem to matter as long 686 // as the bits match one of the inline immediate values. For example: 687 // 688 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 689 // so it is a legal inline immediate. 690 // 691 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 692 // floating-point, so it is a legal inline immediate. 693 694 uint32_t Val = static_cast<uint32_t>(Literal); 695 return (Val == FloatToBits(0.0f)) || 696 (Val == FloatToBits(1.0f)) || 697 (Val == FloatToBits(-1.0f)) || 698 (Val == FloatToBits(0.5f)) || 699 (Val == FloatToBits(-0.5f)) || 700 (Val == FloatToBits(2.0f)) || 701 (Val == FloatToBits(-2.0f)) || 702 (Val == FloatToBits(4.0f)) || 703 (Val == FloatToBits(-4.0f)) || 704 (Val == 0x3e22f983 && HasInv2Pi); 705 } 706 707 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 708 if (!HasInv2Pi) 709 return false; 710 711 if (Literal >= -16 && Literal <= 64) 712 return true; 713 714 uint16_t Val = static_cast<uint16_t>(Literal); 715 return Val == 0x3C00 || // 1.0 716 Val == 0xBC00 || // -1.0 717 Val == 0x3800 || // 0.5 718 Val == 0xB800 || // -0.5 719 Val == 0x4000 || // 2.0 720 Val == 0xC000 || // -2.0 721 Val == 0x4400 || // 4.0 722 Val == 0xC400 || // -4.0 723 Val == 0x3118; // 1/2pi 724 } 725 726 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 727 assert(HasInv2Pi); 728 729 if (!EnablePackedInlinableLiterals) 730 return false; 731 732 int16_t Lo16 = static_cast<int16_t>(Literal); 733 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 734 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 735 } 736 737 bool isArgPassedInSGPR(const Argument *A) { 738 const Function *F = A->getParent(); 739 740 // Arguments to compute shaders are never a source of divergence. 741 CallingConv::ID CC = F->getCallingConv(); 742 switch (CC) { 743 case CallingConv::AMDGPU_KERNEL: 744 case CallingConv::SPIR_KERNEL: 745 return true; 746 case CallingConv::AMDGPU_VS: 747 case CallingConv::AMDGPU_HS: 748 case CallingConv::AMDGPU_GS: 749 case CallingConv::AMDGPU_PS: 750 case CallingConv::AMDGPU_CS: 751 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 752 // Everything else is in VGPRs. 753 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 754 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 755 default: 756 // TODO: Should calls support inreg for SGPR inputs? 757 return false; 758 } 759 } 760 761 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. 762 bool isUniformMMO(const MachineMemOperand *MMO) { 763 const Value *Ptr = MMO->getValue(); 764 // UndefValue means this is a load of a kernel input. These are uniform. 765 // Sometimes LDS instructions have constant pointers. 766 // If Ptr is null, then that means this mem operand contains a 767 // PseudoSourceValue like GOT. 768 if (!Ptr || isa<UndefValue>(Ptr) || 769 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 770 return true; 771 772 if (const Argument *Arg = dyn_cast<Argument>(Ptr)) 773 return isArgPassedInSGPR(Arg); 774 775 const Instruction *I = dyn_cast<Instruction>(Ptr); 776 return I && I->getMetadata("amdgpu.uniform"); 777 } 778 779 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 780 if (isGCN3Encoding(ST)) 781 return ByteOffset; 782 return ByteOffset >> 2; 783 } 784 785 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 786 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 787 return isGCN3Encoding(ST) ? 788 isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 789 } 790 } // end namespace AMDGPU 791 792 } // end namespace llvm 793 794 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 795 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 796 const unsigned AMDGPUAS::LOCAL_ADDRESS; 797 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 798 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 799 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 800 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 801 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 802 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 803 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 804 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 805 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 806 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 807 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 808 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 809 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 810 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 811 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 812 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 813 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 814 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 815 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 816 817 namespace llvm { 818 namespace AMDGPU { 819 820 AMDGPUAS getAMDGPUAS(Triple T) { 821 auto Env = T.getEnvironmentName(); 822 AMDGPUAS AS; 823 if (Env == "amdgiz" || Env == "amdgizcl") { 824 AS.FLAT_ADDRESS = 0; 825 AS.PRIVATE_ADDRESS = 5; 826 AS.REGION_ADDRESS = 4; 827 } 828 else { 829 AS.FLAT_ADDRESS = 4; 830 AS.PRIVATE_ADDRESS = 0; 831 AS.REGION_ADDRESS = 5; 832 } 833 return AS; 834 } 835 836 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 837 return getAMDGPUAS(M.getTargetTriple()); 838 } 839 840 AMDGPUAS getAMDGPUAS(const Module &M) { 841 return getAMDGPUAS(Triple(M.getTargetTriple())); 842 } 843 } // namespace AMDGPU 844 } // namespace llvm 845