1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/MC/MCSectionELF.h" 28 #include "llvm/MC/MCSubtargetInfo.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 #define GET_INSTRINFO_NAMED_OPS 42 #include "AMDGPUGenInstrInfo.inc" 43 #undef GET_INSTRINFO_NAMED_OPS 44 45 namespace { 46 47 /// \returns Bit mask for given bit \p Shift and bit \p Width. 48 unsigned getBitMask(unsigned Shift, unsigned Width) { 49 return ((1 << Width) - 1) << Shift; 50 } 51 52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Packed \p Dst. 55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 56 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 57 Dst |= (Src << Shift) & getBitMask(Shift, Width); 58 return Dst; 59 } 60 61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 62 /// 63 /// \returns Unpacked bits. 64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 65 return (Src & getBitMask(Shift, Width)) >> Shift; 66 } 67 68 /// \returns Vmcnt bit shift (lower bits). 69 unsigned getVmcntBitShiftLo() { return 0; } 70 71 /// \returns Vmcnt bit width (lower bits). 72 unsigned getVmcntBitWidthLo() { return 4; } 73 74 /// \returns Expcnt bit shift. 75 unsigned getExpcntBitShift() { return 4; } 76 77 /// \returns Expcnt bit width. 78 unsigned getExpcntBitWidth() { return 3; } 79 80 /// \returns Lgkmcnt bit shift. 81 unsigned getLgkmcntBitShift() { return 8; } 82 83 /// \returns Lgkmcnt bit width. 84 unsigned getLgkmcntBitWidth() { return 4; } 85 86 /// \returns Vmcnt bit shift (higher bits). 87 unsigned getVmcntBitShiftHi() { return 14; } 88 89 /// \returns Vmcnt bit width (higher bits). 90 unsigned getVmcntBitWidthHi() { return 2; } 91 92 } // end namespace anonymous 93 94 namespace llvm { 95 96 static cl::opt<bool> EnablePackedInlinableLiterals( 97 "enable-packed-inlinable-literals", 98 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 99 cl::init(false)); 100 101 namespace AMDGPU { 102 103 namespace IsaInfo { 104 105 IsaVersion getIsaVersion(const FeatureBitset &Features) { 106 // SI. 107 if (Features.test(FeatureISAVersion6_0_0)) 108 return {6, 0, 0}; 109 if (Features.test(FeatureISAVersion6_0_1)) 110 return {6, 0, 1}; 111 // CI. 112 if (Features.test(FeatureISAVersion7_0_0)) 113 return {7, 0, 0}; 114 if (Features.test(FeatureISAVersion7_0_1)) 115 return {7, 0, 1}; 116 if (Features.test(FeatureISAVersion7_0_2)) 117 return {7, 0, 2}; 118 if (Features.test(FeatureISAVersion7_0_3)) 119 return {7, 0, 3}; 120 121 // VI. 122 if (Features.test(FeatureISAVersion8_0_0)) 123 return {8, 0, 0}; 124 if (Features.test(FeatureISAVersion8_0_1)) 125 return {8, 0, 1}; 126 if (Features.test(FeatureISAVersion8_0_2)) 127 return {8, 0, 2}; 128 if (Features.test(FeatureISAVersion8_0_3)) 129 return {8, 0, 3}; 130 if (Features.test(FeatureISAVersion8_0_4)) 131 return {8, 0, 4}; 132 if (Features.test(FeatureISAVersion8_1_0)) 133 return {8, 1, 0}; 134 135 // GFX9. 136 if (Features.test(FeatureISAVersion9_0_0)) 137 return {9, 0, 0}; 138 if (Features.test(FeatureISAVersion9_0_1)) 139 return {9, 0, 1}; 140 if (Features.test(FeatureISAVersion9_0_2)) 141 return {9, 0, 2}; 142 if (Features.test(FeatureISAVersion9_0_3)) 143 return {9, 0, 3}; 144 145 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 146 return {0, 0, 0}; 147 return {7, 0, 0}; 148 } 149 150 unsigned getWavefrontSize(const FeatureBitset &Features) { 151 if (Features.test(FeatureWavefrontSize16)) 152 return 16; 153 if (Features.test(FeatureWavefrontSize32)) 154 return 32; 155 156 return 64; 157 } 158 159 unsigned getLocalMemorySize(const FeatureBitset &Features) { 160 if (Features.test(FeatureLocalMemorySize32768)) 161 return 32768; 162 if (Features.test(FeatureLocalMemorySize65536)) 163 return 65536; 164 165 return 0; 166 } 167 168 unsigned getEUsPerCU(const FeatureBitset &Features) { 169 return 4; 170 } 171 172 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 173 unsigned FlatWorkGroupSize) { 174 if (!Features.test(FeatureGCN)) 175 return 8; 176 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 177 if (N == 1) 178 return 40; 179 N = 40 / N; 180 return std::min(N, 16u); 181 } 182 183 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 184 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 185 } 186 187 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 188 unsigned FlatWorkGroupSize) { 189 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 190 } 191 192 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 193 return 1; 194 } 195 196 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 197 if (!Features.test(FeatureGCN)) 198 return 8; 199 // FIXME: Need to take scratch memory into account. 200 return 10; 201 } 202 203 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 204 unsigned FlatWorkGroupSize) { 205 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 206 getEUsPerCU(Features)) / getEUsPerCU(Features); 207 } 208 209 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 210 return 1; 211 } 212 213 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 214 return 2048; 215 } 216 217 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 218 unsigned FlatWorkGroupSize) { 219 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 220 getWavefrontSize(Features); 221 } 222 223 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 224 IsaVersion Version = getIsaVersion(Features); 225 if (Version.Major >= 8) 226 return 16; 227 return 8; 228 } 229 230 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 231 return 8; 232 } 233 234 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 235 IsaVersion Version = getIsaVersion(Features); 236 if (Version.Major >= 8) 237 return 800; 238 return 512; 239 } 240 241 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 242 if (Features.test(FeatureSGPRInitBug)) 243 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 244 245 IsaVersion Version = getIsaVersion(Features); 246 if (Version.Major >= 8) 247 return 102; 248 return 104; 249 } 250 251 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 252 assert(WavesPerEU != 0); 253 254 if (WavesPerEU >= getMaxWavesPerEU(Features)) 255 return 0; 256 unsigned MinNumSGPRs = 257 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 258 getSGPRAllocGranule(Features)) + 1; 259 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 260 } 261 262 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 263 bool Addressable) { 264 assert(WavesPerEU != 0); 265 266 IsaVersion Version = getIsaVersion(Features); 267 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 268 getSGPRAllocGranule(Features)); 269 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 270 if (Version.Major >= 8 && !Addressable) 271 AddressableNumSGPRs = 112; 272 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 273 } 274 275 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 276 return 4; 277 } 278 279 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 280 return getVGPRAllocGranule(Features); 281 } 282 283 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 284 return 256; 285 } 286 287 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 288 return getTotalNumVGPRs(Features); 289 } 290 291 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 292 assert(WavesPerEU != 0); 293 294 if (WavesPerEU >= getMaxWavesPerEU(Features)) 295 return 0; 296 unsigned MinNumVGPRs = 297 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 298 getVGPRAllocGranule(Features)) + 1; 299 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 300 } 301 302 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 303 assert(WavesPerEU != 0); 304 305 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 306 getVGPRAllocGranule(Features)); 307 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 308 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 309 } 310 311 } // end namespace IsaInfo 312 313 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 314 const FeatureBitset &Features) { 315 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 316 317 memset(&Header, 0, sizeof(Header)); 318 319 Header.amd_kernel_code_version_major = 1; 320 Header.amd_kernel_code_version_minor = 1; 321 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 322 Header.amd_machine_version_major = ISA.Major; 323 Header.amd_machine_version_minor = ISA.Minor; 324 Header.amd_machine_version_stepping = ISA.Stepping; 325 Header.kernel_code_entry_byte_offset = sizeof(Header); 326 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 327 Header.wavefront_size = 6; 328 329 // If the code object does not support indirect functions, then the value must 330 // be 0xffffffff. 331 Header.call_convention = -1; 332 333 // These alignment values are specified in powers of two, so alignment = 334 // 2^n. The minimum alignment is 2^4 = 16. 335 Header.kernarg_segment_alignment = 4; 336 Header.group_segment_alignment = 4; 337 Header.private_segment_alignment = 4; 338 } 339 340 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 341 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 342 } 343 344 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 345 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 346 } 347 348 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 349 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 350 } 351 352 bool shouldEmitConstantsToTextSection(const Triple &TT) { 353 return TT.getOS() != Triple::AMDHSA; 354 } 355 356 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 357 Attribute A = F.getFnAttribute(Name); 358 int Result = Default; 359 360 if (A.isStringAttribute()) { 361 StringRef Str = A.getValueAsString(); 362 if (Str.getAsInteger(0, Result)) { 363 LLVMContext &Ctx = F.getContext(); 364 Ctx.emitError("can't parse integer attribute " + Name); 365 } 366 } 367 368 return Result; 369 } 370 371 std::pair<int, int> getIntegerPairAttribute(const Function &F, 372 StringRef Name, 373 std::pair<int, int> Default, 374 bool OnlyFirstRequired) { 375 Attribute A = F.getFnAttribute(Name); 376 if (!A.isStringAttribute()) 377 return Default; 378 379 LLVMContext &Ctx = F.getContext(); 380 std::pair<int, int> Ints = Default; 381 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 382 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 383 Ctx.emitError("can't parse first integer attribute " + Name); 384 return Default; 385 } 386 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 387 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 388 Ctx.emitError("can't parse second integer attribute " + Name); 389 return Default; 390 } 391 } 392 393 return Ints; 394 } 395 396 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 397 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 398 if (Version.Major < 9) 399 return VmcntLo; 400 401 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 402 return VmcntLo | VmcntHi; 403 } 404 405 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 406 return (1 << getExpcntBitWidth()) - 1; 407 } 408 409 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 410 return (1 << getLgkmcntBitWidth()) - 1; 411 } 412 413 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 414 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 415 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 416 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 417 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 418 if (Version.Major < 9) 419 return Waitcnt; 420 421 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 422 return Waitcnt | VmcntHi; 423 } 424 425 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 426 unsigned VmcntLo = 427 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 428 if (Version.Major < 9) 429 return VmcntLo; 430 431 unsigned VmcntHi = 432 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 433 VmcntHi <<= getVmcntBitWidthLo(); 434 return VmcntLo | VmcntHi; 435 } 436 437 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 438 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 439 } 440 441 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 442 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 443 } 444 445 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 446 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 447 Vmcnt = decodeVmcnt(Version, Waitcnt); 448 Expcnt = decodeExpcnt(Version, Waitcnt); 449 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 450 } 451 452 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 453 unsigned Vmcnt) { 454 Waitcnt = 455 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 456 if (Version.Major < 9) 457 return Waitcnt; 458 459 Vmcnt >>= getVmcntBitWidthLo(); 460 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 461 } 462 463 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 464 unsigned Expcnt) { 465 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 466 } 467 468 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 469 unsigned Lgkmcnt) { 470 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 471 } 472 473 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 474 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 475 unsigned Waitcnt = getWaitcntBitMask(Version); 476 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 477 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 478 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 479 return Waitcnt; 480 } 481 482 unsigned getInitialPSInputAddr(const Function &F) { 483 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 484 } 485 486 bool isShader(CallingConv::ID cc) { 487 switch(cc) { 488 case CallingConv::AMDGPU_VS: 489 case CallingConv::AMDGPU_LS: 490 case CallingConv::AMDGPU_HS: 491 case CallingConv::AMDGPU_ES: 492 case CallingConv::AMDGPU_GS: 493 case CallingConv::AMDGPU_PS: 494 case CallingConv::AMDGPU_CS: 495 return true; 496 default: 497 return false; 498 } 499 } 500 501 bool isCompute(CallingConv::ID cc) { 502 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 503 } 504 505 bool isEntryFunctionCC(CallingConv::ID CC) { 506 switch (CC) { 507 case CallingConv::AMDGPU_KERNEL: 508 case CallingConv::SPIR_KERNEL: 509 case CallingConv::AMDGPU_VS: 510 case CallingConv::AMDGPU_GS: 511 case CallingConv::AMDGPU_PS: 512 case CallingConv::AMDGPU_CS: 513 case CallingConv::AMDGPU_ES: 514 case CallingConv::AMDGPU_HS: 515 case CallingConv::AMDGPU_LS: 516 return true; 517 default: 518 return false; 519 } 520 } 521 522 bool isSI(const MCSubtargetInfo &STI) { 523 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 524 } 525 526 bool isCI(const MCSubtargetInfo &STI) { 527 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 528 } 529 530 bool isVI(const MCSubtargetInfo &STI) { 531 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 532 } 533 534 bool isGFX9(const MCSubtargetInfo &STI) { 535 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 536 } 537 538 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 539 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 540 } 541 542 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 543 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 544 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 545 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 546 Reg == AMDGPU::SCC; 547 } 548 549 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 550 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 551 if (*R == Reg1) return true; 552 } 553 return false; 554 } 555 556 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 557 558 switch(Reg) { 559 default: break; 560 case AMDGPU::FLAT_SCR: 561 assert(!isSI(STI)); 562 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 563 564 case AMDGPU::FLAT_SCR_LO: 565 assert(!isSI(STI)); 566 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 567 568 case AMDGPU::FLAT_SCR_HI: 569 assert(!isSI(STI)); 570 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 571 } 572 return Reg; 573 } 574 575 unsigned mc2PseudoReg(unsigned Reg) { 576 switch (Reg) { 577 case AMDGPU::FLAT_SCR_ci: 578 case AMDGPU::FLAT_SCR_vi: 579 return FLAT_SCR; 580 581 case AMDGPU::FLAT_SCR_LO_ci: 582 case AMDGPU::FLAT_SCR_LO_vi: 583 return AMDGPU::FLAT_SCR_LO; 584 585 case AMDGPU::FLAT_SCR_HI_ci: 586 case AMDGPU::FLAT_SCR_HI_vi: 587 return AMDGPU::FLAT_SCR_HI; 588 589 default: 590 return Reg; 591 } 592 } 593 594 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 595 assert(OpNo < Desc.NumOperands); 596 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 597 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 598 OpType <= AMDGPU::OPERAND_SRC_LAST; 599 } 600 601 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 602 assert(OpNo < Desc.NumOperands); 603 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 604 switch (OpType) { 605 case AMDGPU::OPERAND_REG_IMM_FP32: 606 case AMDGPU::OPERAND_REG_IMM_FP64: 607 case AMDGPU::OPERAND_REG_IMM_FP16: 608 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 609 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 610 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 611 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 612 return true; 613 default: 614 return false; 615 } 616 } 617 618 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 619 assert(OpNo < Desc.NumOperands); 620 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 621 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 622 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 623 } 624 625 // Avoid using MCRegisterClass::getSize, since that function will go away 626 // (move from MC* level to Target* level). Return size in bits. 627 unsigned getRegBitWidth(unsigned RCID) { 628 switch (RCID) { 629 case AMDGPU::SGPR_32RegClassID: 630 case AMDGPU::VGPR_32RegClassID: 631 case AMDGPU::VS_32RegClassID: 632 case AMDGPU::SReg_32RegClassID: 633 case AMDGPU::SReg_32_XM0RegClassID: 634 return 32; 635 case AMDGPU::SGPR_64RegClassID: 636 case AMDGPU::VS_64RegClassID: 637 case AMDGPU::SReg_64RegClassID: 638 case AMDGPU::VReg_64RegClassID: 639 return 64; 640 case AMDGPU::VReg_96RegClassID: 641 return 96; 642 case AMDGPU::SGPR_128RegClassID: 643 case AMDGPU::SReg_128RegClassID: 644 case AMDGPU::VReg_128RegClassID: 645 return 128; 646 case AMDGPU::SReg_256RegClassID: 647 case AMDGPU::VReg_256RegClassID: 648 return 256; 649 case AMDGPU::SReg_512RegClassID: 650 case AMDGPU::VReg_512RegClassID: 651 return 512; 652 default: 653 llvm_unreachable("Unexpected register class"); 654 } 655 } 656 657 unsigned getRegBitWidth(const MCRegisterClass &RC) { 658 return getRegBitWidth(RC.getID()); 659 } 660 661 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 662 unsigned OpNo) { 663 assert(OpNo < Desc.NumOperands); 664 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 665 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 666 } 667 668 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 669 if (Literal >= -16 && Literal <= 64) 670 return true; 671 672 uint64_t Val = static_cast<uint64_t>(Literal); 673 return (Val == DoubleToBits(0.0)) || 674 (Val == DoubleToBits(1.0)) || 675 (Val == DoubleToBits(-1.0)) || 676 (Val == DoubleToBits(0.5)) || 677 (Val == DoubleToBits(-0.5)) || 678 (Val == DoubleToBits(2.0)) || 679 (Val == DoubleToBits(-2.0)) || 680 (Val == DoubleToBits(4.0)) || 681 (Val == DoubleToBits(-4.0)) || 682 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 683 } 684 685 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 686 if (Literal >= -16 && Literal <= 64) 687 return true; 688 689 // The actual type of the operand does not seem to matter as long 690 // as the bits match one of the inline immediate values. For example: 691 // 692 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 693 // so it is a legal inline immediate. 694 // 695 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 696 // floating-point, so it is a legal inline immediate. 697 698 uint32_t Val = static_cast<uint32_t>(Literal); 699 return (Val == FloatToBits(0.0f)) || 700 (Val == FloatToBits(1.0f)) || 701 (Val == FloatToBits(-1.0f)) || 702 (Val == FloatToBits(0.5f)) || 703 (Val == FloatToBits(-0.5f)) || 704 (Val == FloatToBits(2.0f)) || 705 (Val == FloatToBits(-2.0f)) || 706 (Val == FloatToBits(4.0f)) || 707 (Val == FloatToBits(-4.0f)) || 708 (Val == 0x3e22f983 && HasInv2Pi); 709 } 710 711 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 712 if (!HasInv2Pi) 713 return false; 714 715 if (Literal >= -16 && Literal <= 64) 716 return true; 717 718 uint16_t Val = static_cast<uint16_t>(Literal); 719 return Val == 0x3C00 || // 1.0 720 Val == 0xBC00 || // -1.0 721 Val == 0x3800 || // 0.5 722 Val == 0xB800 || // -0.5 723 Val == 0x4000 || // 2.0 724 Val == 0xC000 || // -2.0 725 Val == 0x4400 || // 4.0 726 Val == 0xC400 || // -4.0 727 Val == 0x3118; // 1/2pi 728 } 729 730 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 731 assert(HasInv2Pi); 732 733 if (!EnablePackedInlinableLiterals) 734 return false; 735 736 int16_t Lo16 = static_cast<int16_t>(Literal); 737 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 738 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 739 } 740 741 bool isArgPassedInSGPR(const Argument *A) { 742 const Function *F = A->getParent(); 743 744 // Arguments to compute shaders are never a source of divergence. 745 CallingConv::ID CC = F->getCallingConv(); 746 switch (CC) { 747 case CallingConv::AMDGPU_KERNEL: 748 case CallingConv::SPIR_KERNEL: 749 return true; 750 case CallingConv::AMDGPU_VS: 751 case CallingConv::AMDGPU_LS: 752 case CallingConv::AMDGPU_HS: 753 case CallingConv::AMDGPU_ES: 754 case CallingConv::AMDGPU_GS: 755 case CallingConv::AMDGPU_PS: 756 case CallingConv::AMDGPU_CS: 757 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 758 // Everything else is in VGPRs. 759 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 760 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 761 default: 762 // TODO: Should calls support inreg for SGPR inputs? 763 return false; 764 } 765 } 766 767 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. 768 bool isUniformMMO(const MachineMemOperand *MMO) { 769 const Value *Ptr = MMO->getValue(); 770 // UndefValue means this is a load of a kernel input. These are uniform. 771 // Sometimes LDS instructions have constant pointers. 772 // If Ptr is null, then that means this mem operand contains a 773 // PseudoSourceValue like GOT. 774 if (!Ptr || isa<UndefValue>(Ptr) || 775 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 776 return true; 777 778 if (const Argument *Arg = dyn_cast<Argument>(Ptr)) 779 return isArgPassedInSGPR(Arg); 780 781 const Instruction *I = dyn_cast<Instruction>(Ptr); 782 return I && I->getMetadata("amdgpu.uniform"); 783 } 784 785 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 786 if (isGCN3Encoding(ST)) 787 return ByteOffset; 788 return ByteOffset >> 2; 789 } 790 791 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 792 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 793 return isGCN3Encoding(ST) ? 794 isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 795 } 796 } // end namespace AMDGPU 797 798 } // end namespace llvm 799 800 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 801 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 802 const unsigned AMDGPUAS::LOCAL_ADDRESS; 803 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 804 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 805 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 806 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 807 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 808 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 809 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 810 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 811 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 812 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 813 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 814 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 815 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 816 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 817 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 818 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 819 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 820 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 821 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 822 823 namespace llvm { 824 namespace AMDGPU { 825 826 AMDGPUAS getAMDGPUAS(Triple T) { 827 auto Env = T.getEnvironmentName(); 828 AMDGPUAS AS; 829 if (Env == "amdgiz" || Env == "amdgizcl") { 830 AS.FLAT_ADDRESS = 0; 831 AS.PRIVATE_ADDRESS = 5; 832 AS.REGION_ADDRESS = 4; 833 } 834 else { 835 AS.FLAT_ADDRESS = 4; 836 AS.PRIVATE_ADDRESS = 0; 837 AS.REGION_ADDRESS = 5; 838 } 839 return AS; 840 } 841 842 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 843 return getAMDGPUAS(M.getTargetTriple()); 844 } 845 846 AMDGPUAS getAMDGPUAS(const Module &M) { 847 return getAMDGPUAS(Triple(M.getTargetTriple())); 848 } 849 } // namespace AMDGPU 850 } // namespace llvm 851