1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/MC/MCSectionELF.h" 28 #include "llvm/MC/MCSubtargetInfo.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 #define GET_INSTRINFO_NAMED_OPS 42 #include "AMDGPUGenInstrInfo.inc" 43 #undef GET_INSTRINFO_NAMED_OPS 44 45 namespace { 46 47 /// \returns Bit mask for given bit \p Shift and bit \p Width. 48 unsigned getBitMask(unsigned Shift, unsigned Width) { 49 return ((1 << Width) - 1) << Shift; 50 } 51 52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Packed \p Dst. 55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 56 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 57 Dst |= (Src << Shift) & getBitMask(Shift, Width); 58 return Dst; 59 } 60 61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 62 /// 63 /// \returns Unpacked bits. 64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 65 return (Src & getBitMask(Shift, Width)) >> Shift; 66 } 67 68 /// \returns Vmcnt bit shift (lower bits). 69 unsigned getVmcntBitShiftLo() { return 0; } 70 71 /// \returns Vmcnt bit width (lower bits). 72 unsigned getVmcntBitWidthLo() { return 4; } 73 74 /// \returns Expcnt bit shift. 75 unsigned getExpcntBitShift() { return 4; } 76 77 /// \returns Expcnt bit width. 78 unsigned getExpcntBitWidth() { return 3; } 79 80 /// \returns Lgkmcnt bit shift. 81 unsigned getLgkmcntBitShift() { return 8; } 82 83 /// \returns Lgkmcnt bit width. 84 unsigned getLgkmcntBitWidth() { return 4; } 85 86 /// \returns Vmcnt bit shift (higher bits). 87 unsigned getVmcntBitShiftHi() { return 14; } 88 89 /// \returns Vmcnt bit width (higher bits). 90 unsigned getVmcntBitWidthHi() { return 2; } 91 92 } // end namespace anonymous 93 94 namespace llvm { 95 96 static cl::opt<bool> EnablePackedInlinableLiterals( 97 "enable-packed-inlinable-literals", 98 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 99 cl::init(false)); 100 101 namespace AMDGPU { 102 103 namespace IsaInfo { 104 105 IsaVersion getIsaVersion(const FeatureBitset &Features) { 106 // SI. 107 if (Features.test(FeatureISAVersion6_0_0)) 108 return {6, 0, 0}; 109 if (Features.test(FeatureISAVersion6_0_1)) 110 return {6, 0, 1}; 111 // CI. 112 if (Features.test(FeatureISAVersion7_0_0)) 113 return {7, 0, 0}; 114 if (Features.test(FeatureISAVersion7_0_1)) 115 return {7, 0, 1}; 116 if (Features.test(FeatureISAVersion7_0_2)) 117 return {7, 0, 2}; 118 if (Features.test(FeatureISAVersion7_0_3)) 119 return {7, 0, 3}; 120 121 // VI. 122 if (Features.test(FeatureISAVersion8_0_0)) 123 return {8, 0, 0}; 124 if (Features.test(FeatureISAVersion8_0_1)) 125 return {8, 0, 1}; 126 if (Features.test(FeatureISAVersion8_0_2)) 127 return {8, 0, 2}; 128 if (Features.test(FeatureISAVersion8_0_3)) 129 return {8, 0, 3}; 130 if (Features.test(FeatureISAVersion8_0_4)) 131 return {8, 0, 4}; 132 if (Features.test(FeatureISAVersion8_1_0)) 133 return {8, 1, 0}; 134 135 // GFX9. 136 if (Features.test(FeatureISAVersion9_0_0)) 137 return {9, 0, 0}; 138 if (Features.test(FeatureISAVersion9_0_1)) 139 return {9, 0, 1}; 140 if (Features.test(FeatureISAVersion9_0_2)) 141 return {9, 0, 2}; 142 if (Features.test(FeatureISAVersion9_0_3)) 143 return {9, 0, 3}; 144 145 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 146 return {0, 0, 0}; 147 return {7, 0, 0}; 148 } 149 150 unsigned getWavefrontSize(const FeatureBitset &Features) { 151 if (Features.test(FeatureWavefrontSize16)) 152 return 16; 153 if (Features.test(FeatureWavefrontSize32)) 154 return 32; 155 156 return 64; 157 } 158 159 unsigned getLocalMemorySize(const FeatureBitset &Features) { 160 if (Features.test(FeatureLocalMemorySize32768)) 161 return 32768; 162 if (Features.test(FeatureLocalMemorySize65536)) 163 return 65536; 164 165 return 0; 166 } 167 168 unsigned getEUsPerCU(const FeatureBitset &Features) { 169 return 4; 170 } 171 172 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 173 unsigned FlatWorkGroupSize) { 174 if (!Features.test(FeatureGCN)) 175 return 8; 176 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 177 if (N == 1) 178 return 40; 179 N = 40 / N; 180 return std::min(N, 16u); 181 } 182 183 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 184 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 185 } 186 187 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 188 unsigned FlatWorkGroupSize) { 189 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 190 } 191 192 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 193 return 1; 194 } 195 196 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 197 if (!Features.test(FeatureGCN)) 198 return 8; 199 // FIXME: Need to take scratch memory into account. 200 return 10; 201 } 202 203 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 204 unsigned FlatWorkGroupSize) { 205 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 206 getEUsPerCU(Features)) / getEUsPerCU(Features); 207 } 208 209 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 210 return 1; 211 } 212 213 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 214 return 2048; 215 } 216 217 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 218 unsigned FlatWorkGroupSize) { 219 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 220 getWavefrontSize(Features); 221 } 222 223 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 224 IsaVersion Version = getIsaVersion(Features); 225 if (Version.Major >= 8) 226 return 16; 227 return 8; 228 } 229 230 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 231 return 8; 232 } 233 234 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 235 IsaVersion Version = getIsaVersion(Features); 236 if (Version.Major >= 8) 237 return 800; 238 return 512; 239 } 240 241 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 242 if (Features.test(FeatureSGPRInitBug)) 243 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 244 245 IsaVersion Version = getIsaVersion(Features); 246 if (Version.Major >= 8) 247 return 102; 248 return 104; 249 } 250 251 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 252 assert(WavesPerEU != 0); 253 254 if (WavesPerEU >= getMaxWavesPerEU(Features)) 255 return 0; 256 unsigned MinNumSGPRs = 257 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 258 getSGPRAllocGranule(Features)) + 1; 259 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 260 } 261 262 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 263 bool Addressable) { 264 assert(WavesPerEU != 0); 265 266 IsaVersion Version = getIsaVersion(Features); 267 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 268 getSGPRAllocGranule(Features)); 269 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 270 if (Version.Major >= 8 && !Addressable) 271 AddressableNumSGPRs = 112; 272 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 273 } 274 275 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 276 return 4; 277 } 278 279 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 280 return getVGPRAllocGranule(Features); 281 } 282 283 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 284 return 256; 285 } 286 287 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 288 return getTotalNumVGPRs(Features); 289 } 290 291 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 292 assert(WavesPerEU != 0); 293 294 if (WavesPerEU >= getMaxWavesPerEU(Features)) 295 return 0; 296 unsigned MinNumVGPRs = 297 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 298 getVGPRAllocGranule(Features)) + 1; 299 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 300 } 301 302 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 303 assert(WavesPerEU != 0); 304 305 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 306 getVGPRAllocGranule(Features)); 307 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 308 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 309 } 310 311 } // end namespace IsaInfo 312 313 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 314 const FeatureBitset &Features) { 315 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 316 317 memset(&Header, 0, sizeof(Header)); 318 319 Header.amd_kernel_code_version_major = 1; 320 Header.amd_kernel_code_version_minor = 1; 321 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 322 Header.amd_machine_version_major = ISA.Major; 323 Header.amd_machine_version_minor = ISA.Minor; 324 Header.amd_machine_version_stepping = ISA.Stepping; 325 Header.kernel_code_entry_byte_offset = sizeof(Header); 326 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 327 Header.wavefront_size = 6; 328 329 // If the code object does not support indirect functions, then the value must 330 // be 0xffffffff. 331 Header.call_convention = -1; 332 333 // These alignment values are specified in powers of two, so alignment = 334 // 2^n. The minimum alignment is 2^4 = 16. 335 Header.kernarg_segment_alignment = 4; 336 Header.group_segment_alignment = 4; 337 Header.private_segment_alignment = 4; 338 } 339 340 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 341 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 342 } 343 344 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 345 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 346 } 347 348 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 349 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 350 } 351 352 bool shouldEmitConstantsToTextSection(const Triple &TT) { 353 return TT.getOS() != Triple::AMDHSA; 354 } 355 356 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 357 Attribute A = F.getFnAttribute(Name); 358 int Result = Default; 359 360 if (A.isStringAttribute()) { 361 StringRef Str = A.getValueAsString(); 362 if (Str.getAsInteger(0, Result)) { 363 LLVMContext &Ctx = F.getContext(); 364 Ctx.emitError("can't parse integer attribute " + Name); 365 } 366 } 367 368 return Result; 369 } 370 371 std::pair<int, int> getIntegerPairAttribute(const Function &F, 372 StringRef Name, 373 std::pair<int, int> Default, 374 bool OnlyFirstRequired) { 375 Attribute A = F.getFnAttribute(Name); 376 if (!A.isStringAttribute()) 377 return Default; 378 379 LLVMContext &Ctx = F.getContext(); 380 std::pair<int, int> Ints = Default; 381 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 382 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 383 Ctx.emitError("can't parse first integer attribute " + Name); 384 return Default; 385 } 386 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 387 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 388 Ctx.emitError("can't parse second integer attribute " + Name); 389 return Default; 390 } 391 } 392 393 return Ints; 394 } 395 396 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 397 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 398 if (Version.Major < 9) 399 return VmcntLo; 400 401 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 402 return VmcntLo | VmcntHi; 403 } 404 405 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 406 return (1 << getExpcntBitWidth()) - 1; 407 } 408 409 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 410 return (1 << getLgkmcntBitWidth()) - 1; 411 } 412 413 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 414 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 415 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 416 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 417 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 418 if (Version.Major < 9) 419 return Waitcnt; 420 421 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 422 return Waitcnt | VmcntHi; 423 } 424 425 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 426 unsigned VmcntLo = 427 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 428 if (Version.Major < 9) 429 return VmcntLo; 430 431 unsigned VmcntHi = 432 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 433 VmcntHi <<= getVmcntBitWidthLo(); 434 return VmcntLo | VmcntHi; 435 } 436 437 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 438 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 439 } 440 441 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 442 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 443 } 444 445 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 446 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 447 Vmcnt = decodeVmcnt(Version, Waitcnt); 448 Expcnt = decodeExpcnt(Version, Waitcnt); 449 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 450 } 451 452 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 453 unsigned Vmcnt) { 454 Waitcnt = 455 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 456 if (Version.Major < 9) 457 return Waitcnt; 458 459 Vmcnt >>= getVmcntBitWidthLo(); 460 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 461 } 462 463 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 464 unsigned Expcnt) { 465 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 466 } 467 468 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 469 unsigned Lgkmcnt) { 470 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 471 } 472 473 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 474 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 475 unsigned Waitcnt = getWaitcntBitMask(Version); 476 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 477 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 478 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 479 return Waitcnt; 480 } 481 482 unsigned getInitialPSInputAddr(const Function &F) { 483 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 484 } 485 486 bool isShader(CallingConv::ID cc) { 487 switch(cc) { 488 case CallingConv::AMDGPU_VS: 489 case CallingConv::AMDGPU_HS: 490 case CallingConv::AMDGPU_GS: 491 case CallingConv::AMDGPU_PS: 492 case CallingConv::AMDGPU_CS: 493 return true; 494 default: 495 return false; 496 } 497 } 498 499 bool isCompute(CallingConv::ID cc) { 500 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 501 } 502 503 bool isEntryFunctionCC(CallingConv::ID CC) { 504 switch (CC) { 505 case CallingConv::AMDGPU_KERNEL: 506 case CallingConv::SPIR_KERNEL: 507 case CallingConv::AMDGPU_VS: 508 case CallingConv::AMDGPU_GS: 509 case CallingConv::AMDGPU_PS: 510 case CallingConv::AMDGPU_CS: 511 case CallingConv::AMDGPU_HS: 512 return true; 513 default: 514 return false; 515 } 516 } 517 518 bool isSI(const MCSubtargetInfo &STI) { 519 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 520 } 521 522 bool isCI(const MCSubtargetInfo &STI) { 523 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 524 } 525 526 bool isVI(const MCSubtargetInfo &STI) { 527 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 528 } 529 530 bool isGFX9(const MCSubtargetInfo &STI) { 531 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 532 } 533 534 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 535 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 536 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 537 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 538 Reg == AMDGPU::SCC; 539 } 540 541 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 542 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 543 if (*R == Reg1) return true; 544 } 545 return false; 546 } 547 548 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 549 550 switch(Reg) { 551 default: break; 552 case AMDGPU::FLAT_SCR: 553 assert(!isSI(STI)); 554 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 555 556 case AMDGPU::FLAT_SCR_LO: 557 assert(!isSI(STI)); 558 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 559 560 case AMDGPU::FLAT_SCR_HI: 561 assert(!isSI(STI)); 562 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 563 } 564 return Reg; 565 } 566 567 unsigned mc2PseudoReg(unsigned Reg) { 568 switch (Reg) { 569 case AMDGPU::FLAT_SCR_ci: 570 case AMDGPU::FLAT_SCR_vi: 571 return FLAT_SCR; 572 573 case AMDGPU::FLAT_SCR_LO_ci: 574 case AMDGPU::FLAT_SCR_LO_vi: 575 return AMDGPU::FLAT_SCR_LO; 576 577 case AMDGPU::FLAT_SCR_HI_ci: 578 case AMDGPU::FLAT_SCR_HI_vi: 579 return AMDGPU::FLAT_SCR_HI; 580 581 default: 582 return Reg; 583 } 584 } 585 586 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 587 assert(OpNo < Desc.NumOperands); 588 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 589 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 590 OpType <= AMDGPU::OPERAND_SRC_LAST; 591 } 592 593 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 594 assert(OpNo < Desc.NumOperands); 595 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 596 switch (OpType) { 597 case AMDGPU::OPERAND_REG_IMM_FP32: 598 case AMDGPU::OPERAND_REG_IMM_FP64: 599 case AMDGPU::OPERAND_REG_IMM_FP16: 600 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 601 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 602 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 603 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 604 return true; 605 default: 606 return false; 607 } 608 } 609 610 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 611 assert(OpNo < Desc.NumOperands); 612 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 613 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 614 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 615 } 616 617 // Avoid using MCRegisterClass::getSize, since that function will go away 618 // (move from MC* level to Target* level). Return size in bits. 619 unsigned getRegBitWidth(unsigned RCID) { 620 switch (RCID) { 621 case AMDGPU::SGPR_32RegClassID: 622 case AMDGPU::VGPR_32RegClassID: 623 case AMDGPU::VS_32RegClassID: 624 case AMDGPU::SReg_32RegClassID: 625 case AMDGPU::SReg_32_XM0RegClassID: 626 return 32; 627 case AMDGPU::SGPR_64RegClassID: 628 case AMDGPU::VS_64RegClassID: 629 case AMDGPU::SReg_64RegClassID: 630 case AMDGPU::VReg_64RegClassID: 631 return 64; 632 case AMDGPU::VReg_96RegClassID: 633 return 96; 634 case AMDGPU::SGPR_128RegClassID: 635 case AMDGPU::SReg_128RegClassID: 636 case AMDGPU::VReg_128RegClassID: 637 return 128; 638 case AMDGPU::SReg_256RegClassID: 639 case AMDGPU::VReg_256RegClassID: 640 return 256; 641 case AMDGPU::SReg_512RegClassID: 642 case AMDGPU::VReg_512RegClassID: 643 return 512; 644 default: 645 llvm_unreachable("Unexpected register class"); 646 } 647 } 648 649 unsigned getRegBitWidth(const MCRegisterClass &RC) { 650 return getRegBitWidth(RC.getID()); 651 } 652 653 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 654 unsigned OpNo) { 655 assert(OpNo < Desc.NumOperands); 656 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 657 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 658 } 659 660 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 661 if (Literal >= -16 && Literal <= 64) 662 return true; 663 664 uint64_t Val = static_cast<uint64_t>(Literal); 665 return (Val == DoubleToBits(0.0)) || 666 (Val == DoubleToBits(1.0)) || 667 (Val == DoubleToBits(-1.0)) || 668 (Val == DoubleToBits(0.5)) || 669 (Val == DoubleToBits(-0.5)) || 670 (Val == DoubleToBits(2.0)) || 671 (Val == DoubleToBits(-2.0)) || 672 (Val == DoubleToBits(4.0)) || 673 (Val == DoubleToBits(-4.0)) || 674 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 675 } 676 677 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 678 if (Literal >= -16 && Literal <= 64) 679 return true; 680 681 // The actual type of the operand does not seem to matter as long 682 // as the bits match one of the inline immediate values. For example: 683 // 684 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 685 // so it is a legal inline immediate. 686 // 687 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 688 // floating-point, so it is a legal inline immediate. 689 690 uint32_t Val = static_cast<uint32_t>(Literal); 691 return (Val == FloatToBits(0.0f)) || 692 (Val == FloatToBits(1.0f)) || 693 (Val == FloatToBits(-1.0f)) || 694 (Val == FloatToBits(0.5f)) || 695 (Val == FloatToBits(-0.5f)) || 696 (Val == FloatToBits(2.0f)) || 697 (Val == FloatToBits(-2.0f)) || 698 (Val == FloatToBits(4.0f)) || 699 (Val == FloatToBits(-4.0f)) || 700 (Val == 0x3e22f983 && HasInv2Pi); 701 } 702 703 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 704 if (!HasInv2Pi) 705 return false; 706 707 if (Literal >= -16 && Literal <= 64) 708 return true; 709 710 uint16_t Val = static_cast<uint16_t>(Literal); 711 return Val == 0x3C00 || // 1.0 712 Val == 0xBC00 || // -1.0 713 Val == 0x3800 || // 0.5 714 Val == 0xB800 || // -0.5 715 Val == 0x4000 || // 2.0 716 Val == 0xC000 || // -2.0 717 Val == 0x4400 || // 4.0 718 Val == 0xC400 || // -4.0 719 Val == 0x3118; // 1/2pi 720 } 721 722 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 723 assert(HasInv2Pi); 724 725 if (!EnablePackedInlinableLiterals) 726 return false; 727 728 int16_t Lo16 = static_cast<int16_t>(Literal); 729 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 730 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 731 } 732 733 bool isArgPassedInSGPR(const Argument *A) { 734 const Function *F = A->getParent(); 735 736 // Arguments to compute shaders are never a source of divergence. 737 CallingConv::ID CC = F->getCallingConv(); 738 switch (CC) { 739 case CallingConv::AMDGPU_KERNEL: 740 case CallingConv::SPIR_KERNEL: 741 return true; 742 case CallingConv::AMDGPU_VS: 743 case CallingConv::AMDGPU_HS: 744 case CallingConv::AMDGPU_GS: 745 case CallingConv::AMDGPU_PS: 746 case CallingConv::AMDGPU_CS: 747 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 748 // Everything else is in VGPRs. 749 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 750 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 751 default: 752 // TODO: Should calls support inreg for SGPR inputs? 753 return false; 754 } 755 } 756 757 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. 758 bool isUniformMMO(const MachineMemOperand *MMO) { 759 const Value *Ptr = MMO->getValue(); 760 // UndefValue means this is a load of a kernel input. These are uniform. 761 // Sometimes LDS instructions have constant pointers. 762 // If Ptr is null, then that means this mem operand contains a 763 // PseudoSourceValue like GOT. 764 if (!Ptr || isa<UndefValue>(Ptr) || 765 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 766 return true; 767 768 if (const Argument *Arg = dyn_cast<Argument>(Ptr)) 769 return isArgPassedInSGPR(Arg); 770 771 const Instruction *I = dyn_cast<Instruction>(Ptr); 772 return I && I->getMetadata("amdgpu.uniform"); 773 } 774 775 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 776 if (isSI(ST) || isCI(ST)) 777 return ByteOffset >> 2; 778 779 return ByteOffset; 780 } 781 782 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 783 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 784 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : 785 isUInt<20>(EncodedOffset); 786 } 787 } // end namespace AMDGPU 788 789 } // end namespace llvm 790 791 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 792 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 793 const unsigned AMDGPUAS::LOCAL_ADDRESS; 794 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 795 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 796 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 797 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 798 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 799 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 800 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 801 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 802 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 803 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 804 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 805 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 806 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 807 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 808 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 809 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 810 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 811 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 812 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 813 814 namespace llvm { 815 namespace AMDGPU { 816 817 AMDGPUAS getAMDGPUAS(Triple T) { 818 auto Env = T.getEnvironmentName(); 819 AMDGPUAS AS; 820 if (Env == "amdgiz" || Env == "amdgizcl") { 821 AS.FLAT_ADDRESS = 0; 822 AS.PRIVATE_ADDRESS = 5; 823 AS.REGION_ADDRESS = 4; 824 } 825 else { 826 AS.FLAT_ADDRESS = 4; 827 AS.PRIVATE_ADDRESS = 0; 828 AS.REGION_ADDRESS = 5; 829 } 830 return AS; 831 } 832 833 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 834 return getAMDGPUAS(M.getTargetTriple()); 835 } 836 837 AMDGPUAS getAMDGPUAS(const Module &M) { 838 return getAMDGPUAS(Triple(M.getTargetTriple())); 839 } 840 } // namespace AMDGPU 841 } // namespace llvm 842