1 //===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "SIDefines.h" 12 #include "llvm/CodeGen/MachineMemOperand.h" 13 #include "llvm/IR/Constants.h" 14 #include "llvm/IR/Function.h" 15 #include "llvm/IR/GlobalValue.h" 16 #include "llvm/IR/LLVMContext.h" 17 #include "llvm/MC/MCContext.h" 18 #include "llvm/MC/MCInstrInfo.h" 19 #include "llvm/MC/MCRegisterInfo.h" 20 #include "llvm/MC/MCSectionELF.h" 21 #include "llvm/MC/MCSubtargetInfo.h" 22 #include "llvm/MC/SubtargetFeature.h" 23 24 #define GET_SUBTARGETINFO_ENUM 25 #include "AMDGPUGenSubtargetInfo.inc" 26 #undef GET_SUBTARGETINFO_ENUM 27 28 #define GET_REGINFO_ENUM 29 #include "AMDGPUGenRegisterInfo.inc" 30 #undef GET_REGINFO_ENUM 31 32 #define GET_INSTRINFO_NAMED_OPS 33 #define GET_INSTRINFO_ENUM 34 #include "AMDGPUGenInstrInfo.inc" 35 #undef GET_INSTRINFO_NAMED_OPS 36 #undef GET_INSTRINFO_ENUM 37 38 namespace { 39 40 /// \returns Bit mask for given bit \p Shift and bit \p Width. 41 unsigned getBitMask(unsigned Shift, unsigned Width) { 42 return ((1 << Width) - 1) << Shift; 43 } 44 45 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 46 /// 47 /// \returns Packed \p Dst. 48 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 49 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 50 Dst |= (Src << Shift) & getBitMask(Shift, Width); 51 return Dst; 52 } 53 54 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 55 /// 56 /// \returns Unpacked bits. 57 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 58 return (Src & getBitMask(Shift, Width)) >> Shift; 59 } 60 61 /// \returns Vmcnt bit shift. 62 unsigned getVmcntBitShift() { return 0; } 63 64 /// \returns Vmcnt bit width. 65 unsigned getVmcntBitWidth() { return 4; } 66 67 /// \returns Expcnt bit shift. 68 unsigned getExpcntBitShift() { return 4; } 69 70 /// \returns Expcnt bit width. 71 unsigned getExpcntBitWidth() { return 3; } 72 73 /// \returns Lgkmcnt bit shift. 74 unsigned getLgkmcntBitShift() { return 8; } 75 76 /// \returns Lgkmcnt bit width. 77 unsigned getLgkmcntBitWidth() { return 4; } 78 79 } // namespace anonymous 80 81 namespace llvm { 82 namespace AMDGPU { 83 84 namespace IsaInfo { 85 86 IsaVersion getIsaVersion(const FeatureBitset &Features) { 87 // CI. 88 if (Features.test(FeatureISAVersion7_0_0)) 89 return {7, 0, 0}; 90 if (Features.test(FeatureISAVersion7_0_1)) 91 return {7, 0, 1}; 92 if (Features.test(FeatureISAVersion7_0_2)) 93 return {7, 0, 2}; 94 95 // VI. 96 if (Features.test(FeatureISAVersion8_0_0)) 97 return {8, 0, 0}; 98 if (Features.test(FeatureISAVersion8_0_1)) 99 return {8, 0, 1}; 100 if (Features.test(FeatureISAVersion8_0_2)) 101 return {8, 0, 2}; 102 if (Features.test(FeatureISAVersion8_0_3)) 103 return {8, 0, 3}; 104 if (Features.test(FeatureISAVersion8_0_4)) 105 return {8, 0, 4}; 106 if (Features.test(FeatureISAVersion8_1_0)) 107 return {8, 1, 0}; 108 109 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 110 return {0, 0, 0}; 111 return {7, 0, 0}; 112 } 113 114 unsigned getWavefrontSize(const FeatureBitset &Features) { 115 if (Features.test(FeatureWavefrontSize16)) 116 return 16; 117 if (Features.test(FeatureWavefrontSize32)) 118 return 32; 119 120 return 64; 121 } 122 123 unsigned getLocalMemorySize(const FeatureBitset &Features) { 124 if (Features.test(FeatureLocalMemorySize32768)) 125 return 32768; 126 if (Features.test(FeatureLocalMemorySize65536)) 127 return 65536; 128 129 return 0; 130 } 131 132 unsigned getEUsPerCU(const FeatureBitset &Features) { 133 return 4; 134 } 135 136 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 137 unsigned FlatWorkGroupSize) { 138 if (!Features.test(FeatureGCN)) 139 return 8; 140 return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16; 141 } 142 143 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 144 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 145 } 146 147 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 148 unsigned FlatWorkGroupSize) { 149 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 150 } 151 152 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 153 return 1; 154 } 155 156 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 157 if (!Features.test(FeatureGCN)) 158 return 8; 159 // FIXME: Need to take scratch memory into account. 160 return 10; 161 } 162 163 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 164 unsigned FlatWorkGroupSize) { 165 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 166 getEUsPerCU(Features)) / getEUsPerCU(Features); 167 } 168 169 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 170 return 1; 171 } 172 173 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 174 return 2048; 175 } 176 177 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 178 unsigned FlatWorkGroupSize) { 179 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 180 getWavefrontSize(Features); 181 } 182 183 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 184 IsaVersion Version = getIsaVersion(Features); 185 if (Version.Major >= 8) 186 return 16; 187 return 8; 188 } 189 190 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 191 return 8; 192 } 193 194 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 195 IsaVersion Version = getIsaVersion(Features); 196 if (Version.Major >= 8) 197 return 800; 198 return 512; 199 } 200 201 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 202 if (Features.test(FeatureSGPRInitBug)) 203 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 204 205 IsaVersion Version = getIsaVersion(Features); 206 if (Version.Major >= 8) 207 return 102; 208 return 104; 209 } 210 211 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 212 IsaVersion Version = getIsaVersion(Features); 213 if (Version.Major >= 8) { 214 switch (WavesPerEU) { 215 case 0: return 0; 216 case 10: return 0; 217 case 9: return 0; 218 case 8: return 81; 219 default: return 97; 220 } 221 } else { 222 switch (WavesPerEU) { 223 case 0: return 0; 224 case 10: return 0; 225 case 9: return 49; 226 case 8: return 57; 227 case 7: return 65; 228 case 6: return 73; 229 case 5: return 81; 230 default: return 97; 231 } 232 } 233 } 234 235 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 236 bool Addressable) { 237 IsaVersion Version = getIsaVersion(Features); 238 if (Version.Major >= 8) { 239 switch (WavesPerEU) { 240 case 0: return 80; 241 case 10: return 80; 242 case 9: return 80; 243 case 8: return 96; 244 default: return Addressable ? getAddressableNumSGPRs(Features) : 112; 245 } 246 } else { 247 switch (WavesPerEU) { 248 case 0: return 48; 249 case 10: return 48; 250 case 9: return 56; 251 case 8: return 64; 252 case 7: return 72; 253 case 6: return 80; 254 case 5: return 96; 255 default: return getAddressableNumSGPRs(Features); 256 } 257 } 258 } 259 260 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 261 return 4; 262 } 263 264 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 265 return getVGPRAllocGranule(Features); 266 } 267 268 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 269 return 256; 270 } 271 272 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 273 return getTotalNumVGPRs(Features); 274 } 275 276 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 277 switch (WavesPerEU) { 278 case 0: return 0; 279 case 10: return 0; 280 case 9: return 25; 281 case 8: return 29; 282 case 7: return 33; 283 case 6: return 37; 284 case 5: return 41; 285 case 4: return 49; 286 case 3: return 65; 287 case 2: return 85; 288 default: return 129; 289 } 290 } 291 292 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 293 switch (WavesPerEU) { 294 case 0: return 24; 295 case 10: return 24; 296 case 9: return 28; 297 case 8: return 32; 298 case 7: return 36; 299 case 6: return 40; 300 case 5: return 48; 301 case 4: return 64; 302 case 3: return 84; 303 case 2: return 128; 304 default: return getTotalNumVGPRs(Features); 305 } 306 } 307 308 } // namespace IsaInfo 309 310 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 311 const FeatureBitset &Features) { 312 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 313 314 memset(&Header, 0, sizeof(Header)); 315 316 Header.amd_kernel_code_version_major = 1; 317 Header.amd_kernel_code_version_minor = 0; 318 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 319 Header.amd_machine_version_major = ISA.Major; 320 Header.amd_machine_version_minor = ISA.Minor; 321 Header.amd_machine_version_stepping = ISA.Stepping; 322 Header.kernel_code_entry_byte_offset = sizeof(Header); 323 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 324 Header.wavefront_size = 6; 325 326 // If the code object does not support indirect functions, then the value must 327 // be 0xffffffff. 328 Header.call_convention = -1; 329 330 // These alignment values are specified in powers of two, so alignment = 331 // 2^n. The minimum alignment is 2^4 = 16. 332 Header.kernarg_segment_alignment = 4; 333 Header.group_segment_alignment = 4; 334 Header.private_segment_alignment = 4; 335 } 336 337 MCSection *getHSATextSection(MCContext &Ctx) { 338 return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, 339 ELF::SHF_ALLOC | ELF::SHF_WRITE | 340 ELF::SHF_EXECINSTR | 341 ELF::SHF_AMDGPU_HSA_AGENT | 342 ELF::SHF_AMDGPU_HSA_CODE); 343 } 344 345 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { 346 return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, 347 ELF::SHF_ALLOC | ELF::SHF_WRITE | 348 ELF::SHF_AMDGPU_HSA_GLOBAL | 349 ELF::SHF_AMDGPU_HSA_AGENT); 350 } 351 352 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { 353 return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, 354 ELF::SHF_ALLOC | ELF::SHF_WRITE | 355 ELF::SHF_AMDGPU_HSA_GLOBAL); 356 } 357 358 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { 359 return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, 360 ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | 361 ELF::SHF_AMDGPU_HSA_AGENT); 362 } 363 364 bool isGroupSegment(const GlobalValue *GV) { 365 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 366 } 367 368 bool isGlobalSegment(const GlobalValue *GV) { 369 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 370 } 371 372 bool isReadOnlySegment(const GlobalValue *GV) { 373 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 374 } 375 376 bool shouldEmitConstantsToTextSection(const Triple &TT) { 377 return TT.getOS() != Triple::AMDHSA; 378 } 379 380 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 381 Attribute A = F.getFnAttribute(Name); 382 int Result = Default; 383 384 if (A.isStringAttribute()) { 385 StringRef Str = A.getValueAsString(); 386 if (Str.getAsInteger(0, Result)) { 387 LLVMContext &Ctx = F.getContext(); 388 Ctx.emitError("can't parse integer attribute " + Name); 389 } 390 } 391 392 return Result; 393 } 394 395 std::pair<int, int> getIntegerPairAttribute(const Function &F, 396 StringRef Name, 397 std::pair<int, int> Default, 398 bool OnlyFirstRequired) { 399 Attribute A = F.getFnAttribute(Name); 400 if (!A.isStringAttribute()) 401 return Default; 402 403 LLVMContext &Ctx = F.getContext(); 404 std::pair<int, int> Ints = Default; 405 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 406 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 407 Ctx.emitError("can't parse first integer attribute " + Name); 408 return Default; 409 } 410 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 411 if (!OnlyFirstRequired || Strs.second.trim().size()) { 412 Ctx.emitError("can't parse second integer attribute " + Name); 413 return Default; 414 } 415 } 416 417 return Ints; 418 } 419 420 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 421 return (1 << getVmcntBitWidth()) - 1; 422 } 423 424 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 425 return (1 << getExpcntBitWidth()) - 1; 426 } 427 428 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 429 return (1 << getLgkmcntBitWidth()) - 1; 430 } 431 432 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 433 unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth()); 434 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 435 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 436 return Vmcnt | Expcnt | Lgkmcnt; 437 } 438 439 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 440 return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); 441 } 442 443 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 444 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 445 } 446 447 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 448 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 449 } 450 451 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 452 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 453 Vmcnt = decodeVmcnt(Version, Waitcnt); 454 Expcnt = decodeExpcnt(Version, Waitcnt); 455 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 456 } 457 458 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 459 unsigned Vmcnt) { 460 return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); 461 } 462 463 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 464 unsigned Expcnt) { 465 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 466 } 467 468 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 469 unsigned Lgkmcnt) { 470 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 471 } 472 473 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 474 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 475 unsigned Waitcnt = getWaitcntBitMask(Version); 476 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 477 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 478 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 479 return Waitcnt; 480 } 481 482 unsigned getInitialPSInputAddr(const Function &F) { 483 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 484 } 485 486 bool isShader(CallingConv::ID cc) { 487 switch(cc) { 488 case CallingConv::AMDGPU_VS: 489 case CallingConv::AMDGPU_GS: 490 case CallingConv::AMDGPU_PS: 491 case CallingConv::AMDGPU_CS: 492 return true; 493 default: 494 return false; 495 } 496 } 497 498 bool isCompute(CallingConv::ID cc) { 499 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 500 } 501 502 bool isSI(const MCSubtargetInfo &STI) { 503 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 504 } 505 506 bool isCI(const MCSubtargetInfo &STI) { 507 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 508 } 509 510 bool isVI(const MCSubtargetInfo &STI) { 511 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 512 } 513 514 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 515 516 switch(Reg) { 517 default: break; 518 case AMDGPU::FLAT_SCR: 519 assert(!isSI(STI)); 520 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 521 522 case AMDGPU::FLAT_SCR_LO: 523 assert(!isSI(STI)); 524 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 525 526 case AMDGPU::FLAT_SCR_HI: 527 assert(!isSI(STI)); 528 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 529 } 530 return Reg; 531 } 532 533 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 534 assert(OpNo < Desc.NumOperands); 535 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 536 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 537 OpType <= AMDGPU::OPERAND_SRC_LAST; 538 } 539 540 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 541 assert(OpNo < Desc.NumOperands); 542 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 543 switch (OpType) { 544 case AMDGPU::OPERAND_REG_IMM_FP32: 545 case AMDGPU::OPERAND_REG_IMM_FP64: 546 case AMDGPU::OPERAND_REG_IMM_FP16: 547 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 548 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 549 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 550 return true; 551 default: 552 return false; 553 } 554 } 555 556 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 557 assert(OpNo < Desc.NumOperands); 558 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 559 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 560 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 561 } 562 563 // Avoid using MCRegisterClass::getSize, since that function will go away 564 // (move from MC* level to Target* level). Return size in bits. 565 unsigned getRegBitWidth(unsigned RCID) { 566 switch (RCID) { 567 case AMDGPU::SGPR_32RegClassID: 568 case AMDGPU::VGPR_32RegClassID: 569 case AMDGPU::VS_32RegClassID: 570 case AMDGPU::SReg_32RegClassID: 571 case AMDGPU::SReg_32_XM0RegClassID: 572 return 32; 573 case AMDGPU::SGPR_64RegClassID: 574 case AMDGPU::VS_64RegClassID: 575 case AMDGPU::SReg_64RegClassID: 576 case AMDGPU::VReg_64RegClassID: 577 return 64; 578 case AMDGPU::VReg_96RegClassID: 579 return 96; 580 case AMDGPU::SGPR_128RegClassID: 581 case AMDGPU::SReg_128RegClassID: 582 case AMDGPU::VReg_128RegClassID: 583 return 128; 584 case AMDGPU::SReg_256RegClassID: 585 case AMDGPU::VReg_256RegClassID: 586 return 256; 587 case AMDGPU::SReg_512RegClassID: 588 case AMDGPU::VReg_512RegClassID: 589 return 512; 590 default: 591 llvm_unreachable("Unexpected register class"); 592 } 593 } 594 595 unsigned getRegBitWidth(const MCRegisterClass &RC) { 596 return getRegBitWidth(RC.getID()); 597 } 598 599 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 600 unsigned OpNo) { 601 assert(OpNo < Desc.NumOperands); 602 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 603 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 604 } 605 606 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 607 if (Literal >= -16 && Literal <= 64) 608 return true; 609 610 uint64_t Val = static_cast<uint64_t>(Literal); 611 return (Val == DoubleToBits(0.0)) || 612 (Val == DoubleToBits(1.0)) || 613 (Val == DoubleToBits(-1.0)) || 614 (Val == DoubleToBits(0.5)) || 615 (Val == DoubleToBits(-0.5)) || 616 (Val == DoubleToBits(2.0)) || 617 (Val == DoubleToBits(-2.0)) || 618 (Val == DoubleToBits(4.0)) || 619 (Val == DoubleToBits(-4.0)) || 620 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 621 } 622 623 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 624 if (Literal >= -16 && Literal <= 64) 625 return true; 626 627 // The actual type of the operand does not seem to matter as long 628 // as the bits match one of the inline immediate values. For example: 629 // 630 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 631 // so it is a legal inline immediate. 632 // 633 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 634 // floating-point, so it is a legal inline immediate. 635 636 uint32_t Val = static_cast<uint32_t>(Literal); 637 return (Val == FloatToBits(0.0f)) || 638 (Val == FloatToBits(1.0f)) || 639 (Val == FloatToBits(-1.0f)) || 640 (Val == FloatToBits(0.5f)) || 641 (Val == FloatToBits(-0.5f)) || 642 (Val == FloatToBits(2.0f)) || 643 (Val == FloatToBits(-2.0f)) || 644 (Val == FloatToBits(4.0f)) || 645 (Val == FloatToBits(-4.0f)) || 646 (Val == 0x3e22f983 && HasInv2Pi); 647 } 648 649 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 650 if (!HasInv2Pi) 651 return false; 652 653 if (Literal >= -16 && Literal <= 64) 654 return true; 655 656 uint16_t Val = static_cast<uint16_t>(Literal); 657 return Val == 0x3C00 || // 1.0 658 Val == 0xBC00 || // -1.0 659 Val == 0x3800 || // 0.5 660 Val == 0xB800 || // -0.5 661 Val == 0x4000 || // 2.0 662 Val == 0xC000 || // -2.0 663 Val == 0x4400 || // 4.0 664 Val == 0xC400 || // -4.0 665 Val == 0x3118; // 1/2pi 666 } 667 668 bool isUniformMMO(const MachineMemOperand *MMO) { 669 const Value *Ptr = MMO->getValue(); 670 // UndefValue means this is a load of a kernel input. These are uniform. 671 // Sometimes LDS instructions have constant pointers. 672 // If Ptr is null, then that means this mem operand contains a 673 // PseudoSourceValue like GOT. 674 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 675 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 676 return true; 677 678 const Instruction *I = dyn_cast<Instruction>(Ptr); 679 return I && I->getMetadata("amdgpu.uniform"); 680 } 681 682 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 683 if (isSI(ST) || isCI(ST)) 684 return ByteOffset >> 2; 685 686 return ByteOffset; 687 } 688 689 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 690 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 691 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : 692 isUInt<20>(EncodedOffset); 693 } 694 695 } // End namespace AMDGPU 696 } // End namespace llvm 697